In [28]:
import os
import re
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.ticker import FuncFormatter

In [10]:
# Reads a run log and returns everything
def readRunResults(runPath, eval_space=None, log_file="logging.log"):
    target = eval_space == None
    if re.search("test_results", runPath) or (re.search("runs_0", runPath) and eval_space == None):
        logger = "model_best.pth.tar/logging.log"
    else:
        logger = "model_best/%s" % log_file
#     print(runPath)
#     print(os.path.join(runPath,logger))
    with open(os.path.join(runPath,logger)) as f:
        res = np.zeros(15)
        for line in f:
            if re.search("Space Evaluation", line) or re.search("Space Evalutaion", line):
                if re.search("Latent", line): target = eval_space == 'latent'
                if re.search("Concept", line): target = eval_space == 'concept'
                if re.search("Hybrid", line): target = eval_space == 'hybrid'
            if re.search("Text to Video", line): offset = 0
            if re.search("Video to text", line): offset = 7
            if target and re.search("meanr", line):
                res[offset:offset+5] = line.split("[")[-1].split("]")[0].split(",")
            if target or re.search("mAP", line):
                res[offset+5] = line.split()[2]
            if target or re.search("recall sum", line):
                res[offset+6] = line.split()[3]
        res[[5,12]] *= 100
        res[14] = res[0]+res[1]+res[2]+res[7]+res[8]+res[9]
       
    return(res)

# For sorting experiments by number
def sortKey(name):
    return(int(name.split("s_")[1]))

# Read all runs for an experiment, returns only data for "TPAMI" style tables
def readExpResults(expPath):
    expRes = []
    runList = [runName for runName in os.listdir(expPath) if not re.search(".ipynb", runName)]
    runList = sorted(runList, key=sortKey)
    for runName in runList:
        res = readRunResults(os.path.join(expPath,runName))
        expRes.append(res[[0, 1, 2, 3, 5, 7, 8, 9, 10, 12, 14]])
    expRes = np.array(expRes)
    with np.printoptions(linewidth=120, precision=2, floatmode="fixed"):
        for i, runName in enumerate(runList):
            print(runName, expRes[i])
    print("")
    return(expRes)

# Latex formmatting
def latexAvgTableLine(expName, expRes):
    if expRes.shape[0] > 1:
        tab = np.average(expRes, axis=0)
        line = "%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName, expRes.shape[0], tab[0], tab[1], \
tab[2], tab[3], tab[4], tab[5], tab[6], tab[7], tab[8], tab[9], tab[10])
    else:
        tab = expRes[0]
        line = "%s & %5.01f & %.01f & %5.01f & %5d & %5.01f && %5.01f & %5.01f & %5.01f \
& %5d & %5.01f & %6.01f \\\\" % (expName, tab[0], tab[1], tab[2], \
tab[3], tab[4], tab[5], tab[6], tab[7], tab[8], tab[9], tab[10])
    return(line)

def latexStdTableLine(expName, expRes):
    if expRes.shape[0] > 1:
        tab = np.std(expRes, axis=0, ddof=1)
        line = "%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName, expRes.shape[0], tab[0], tab[1], \
tab[2], tab[3], tab[4], tab[5], tab[6], tab[7], tab[8], tab[9], tab[10])
    else:
        line = "%s & - & - & - & - & - && - & - & - & - & - & - \\\\" % (expName)
    return(line)

def latexTableLineZ(expName1, expRes1, expName2, expRes2, diffName):
    if expRes1.shape[0] > 1 and expRes2.shape[0] > 1:
        avg1 = np.average(expRes1, axis=0)
        avg2 = np.average(expRes2, axis=0)
        std1 = np.std(expRes1, axis=0, ddof=1)
        std2 = np.std(expRes2, axis=0, ddof=1)
        z = (avg2-avg1)/np.sqrt(std1**2/expRes1.shape[0]+std2**2/expRes2.shape[0])
        lines = "%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName1, expRes1.shape[0], avg1[0], avg1[1], \
avg1[2], avg1[3], avg1[4], avg1[5], avg1[6], avg1[7], avg1[8], avg1[9], avg1[10])
        lines = lines + "\n%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName2, expRes2.shape[0], avg2[0], avg2[1], \
avg2[2], avg2[3], avg2[4], avg2[5], avg2[6], avg2[7], avg2[8], avg2[9], avg2[10])
        lines = lines + "\n%s      & %+5.01f & %+5.01f & %+5.01f &   -   & %+5.01f && %+5.01f & %+5.01f \
& %+5.01f &   -   & %+5.01f & %+6.01f \\\\" % (diffName, z[0], z[1], z[2], z[4], z[5], z[6], z[7], z[9], z[10])
    return(lines)

In [29]:
# Reads a run log and returns everything
def readRunResults(runPath, target, eval_space=None, log_file="logging.log"):
#     target = 'concept'
    if re.search("test_results", runPath) or (re.search("runs_0", runPath) and eval_space == None):
        logger = "model_best/logging.log"
    else:
        logger = "model_best/%s" % log_file
#     print(runPath)
#     print(os.path.join(runPath,logger))
    if not os.path.exists(os.path.join(runPath,logger)):
        logger = 'logging.log'
        
    with open(os.path.join(runPath,logger)) as f:
        res = np.zeros(15)
        for line in f:
            if re.search("Space Evaluation", line) or re.search("Space Evalutaion", line):
                if re.search("Latent", line): eval_space = 'latent'
                if re.search("Concept", line): eval_space = 'concept'
                if re.search("Hybrid", line): eval_space = 'hybrid'
            if target == eval_space and re.search("Text to Video", line): offset = 0
            if target == eval_space and re.search("Video to text", line): offset = 7
            if target == eval_space and re.search("meanr", line):
                res[offset:offset+5] = line.split("[")[-1].split("]")[0].split(",")
            if target == eval_space and re.search("mAP", line):
                res[offset+5] = line.split()[2]
            if target == eval_space and re.search("recall sum", line):
                res[offset+6] = line.split()[3]
        res[[5,12]] *= 100
        res[14] = res[0]+res[1]+res[2]+res[7]+res[8]+res[9]
       
    return(res)

# For sorting experiments by number
def sortKey(name):
    return(int(name.split("s_")[1]))

# Read all runs for an experiment, returns only data for "TPAMI" style tables
def readExpResults(expPath, target):
    expRes = []
    runList = [runName for runName in os.listdir(expPath) if not re.search(".ipynb", runName)]
    runList = sorted(runList, key=sortKey)
    for runName in runList:
        res = readRunResults(os.path.join(expPath,runName), target)
        expRes.append(res[[0, 1, 2, 3, 5, 7, 8, 9, 10, 12, 14]])
    expRes = np.array(expRes)
    with np.printoptions(linewidth=120, precision=2, floatmode="fixed"):
        for i, runName in enumerate(runList):
            print(runName, expRes[i])
    print("")
    return(expRes)

# Latex formmatting
def latexAvgTableLine(expName, expRes):
    if expRes.shape[0] > 1:
        tab = np.average(expRes, axis=0)
        line = "%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName, expRes.shape[0], tab[0], tab[1], \
tab[2], tab[3], tab[4], tab[5], tab[6], tab[7], tab[8], tab[9], tab[10])
    else:
        tab = expRes[0]
        line = "%s & %5.01f & %.01f & %5.01f & %5d & %5.01f && %5.01f & %5.01f & %5.01f \
& %5d & %5.01f & %6.01f \\\\" % (expName, tab[0], tab[1], tab[2], \
tab[3], tab[4], tab[5], tab[6], tab[7], tab[8], tab[9], tab[10])
    return(line)

def latexStdTableLine(expName, expRes):
    if expRes.shape[0] > 1:
        tab = np.std(expRes, axis=0, ddof=1)
        line = "%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName, expRes.shape[0], tab[0], tab[1], \
tab[2], tab[3], tab[4], tab[5], tab[6], tab[7], tab[8], tab[9], tab[10])
    else:
        line = "%s & - & - & - & - & - && - & - & - & - & - & - \\\\" % (expName)
    return(line)

def latexTableLineZ(expName1, expRes1, expName2, expRes2, diffName):
    if expRes1.shape[0] > 1 and expRes2.shape[0] > 1:
        avg1 = np.average(expRes1, axis=0)
        avg2 = np.average(expRes2, axis=0)
        std1 = np.std(expRes1, axis=0, ddof=1)
        std2 = np.std(expRes2, axis=0, ddof=1)
        z = (avg2-avg1)/np.sqrt(std1**2/expRes1.shape[0]+std2**2/expRes2.shape[0])
        lines = "%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName1, expRes1.shape[0], avg1[0], avg1[1], \
avg1[2], avg1[3], avg1[4], avg1[5], avg1[6], avg1[7], avg1[8], avg1[9], avg1[10])
        lines = lines + "\n%s (%d) & %5.02f & %5.02f & %5.02f & %5.02f & %5.02f && %5.02f & %5.02f \
& %5.02f & %5.02f & %5.02f & %6.02f \\\\" % (expName2, expRes2.shape[0], avg2[0], avg2[1], \
avg2[2], avg2[3], avg2[4], avg2[5], avg2[6], avg2[7], avg2[8], avg2[9], avg2[10])
        lines = lines + "\n%s      & %+5.01f & %+5.01f & %+5.01f &   -   & %+5.01f && %+5.01f & %+5.01f \
& %+5.01f &   -   & %+5.01f & %+6.01f \\\\" % (diffName, z[0], z[1], z[2], z[4], z[5], z[6], z[7], z[9], z[10])
    return(lines)

In [30]:
rootPath = "/home/mrim/deviv/irim/pytorch/danieljf24/VisualSearch/msrvtt10k/test_results"
# rootPath = "/home/mrim/deviv/irim/pytorch/danieljf24/VisualSearch/iacc.3/test_results"
# expList = ["hybrid_POS_CP", "hybrid_POS_treetagger", "hybrid_POS_spacy", 'hybrid_POS_WN', 'concept_TTtagged_captions']
expList = ['concept_512_NO_POS', 'concept_TTtagged_captions', 'concept_Spacytagged_captions', 'concept_WNtagged_captions', 'concept_WNtagged_captions_impacted', 'concept_WN_captions_impacted']
# Read multiple experiemnts
def multipleExpStats(rootPath, expList):
    avgRes = []
    stdRes = []
    for expName in expList:
        print(expName)
        print(os.path.join(rootPath,expName))
        expRes = readExpResults(os.path.join(rootPath,expName), 'concept')
        print(expRes)
        avgRes.append(latexAvgTableLine(expName, expRes))
        stdRes.append(latexStdTableLine(expName, expRes))
    return(avgRes,stdRes)

# Make statistics for multiple experiments
avgRes,stdRes = multipleExpStats(rootPath, expList)

# Output statistics for multiple experiments as latex lines
print("Averages:")
for line in avgRes: print(line)
print("")
print("Standard deviations:")
for line in stdRes: print(line)

concept_512_NO_POS
/home/mrim/deviv/irim/pytorch/danieljf24/VisualSearch/msrvtt10k/test_results/concept_512_NO_POS
runs_0 [  9.92  27.02  37.21  23.00  18.73  18.90  42.24  54.25   8.00   8.87 189.53]
runs_1 [  9.72  26.57  36.81  24.00  18.39  19.16  42.21  54.95   8.00   8.98 189.42]
runs_2 [  9.89  26.52  36.69  23.00  18.52  18.23  42.07  54.78   8.00   8.85 188.18]
runs_3 [  9.82  27.10  37.38  23.00  18.66  18.59  42.44  54.95   8.00   8.91 190.29]
runs_4 [ 10.03  27.18  37.45  22.00  18.80  19.26  41.24  52.81   9.00   8.90 187.97]
runs_5 [  9.73  26.45  36.57  24.00  18.39  18.33  41.87  54.28   8.00   8.89 187.24]
runs_6 [  9.93  27.15  37.48  23.00  18.76  17.89  40.10  52.91   9.00   8.73 185.46]
runs_7 [  9.90  26.81  37.17  23.00  18.64  19.23  42.07  54.82   9.00   8.89 190.00]
runs_8 [  9.64  26.12  36.43  24.00  18.24  18.43  42.98  54.45   8.00   9.02 188.04]
runs_9 [  9.83  26.94  37.01  24.00  18.59  17.66  41.24  53.98   9.00   8.77 186.66]

[[  9.923  27.018  37.20

In [38]:
rootPath = "/home/mrim/deviv/irim/pytorch/danieljf24/VisualSearch/msrvtt10k/test_results"
# expList = ["hybrid_POS_CP", "hybrid_POS_treetagger", "hybrid_POS_spacy", 'hybrid_POS_WN', 'concept_TTtagged_captions']
expList = ['hybrid_1536_512', 'hybrid_TTtagged_captions', 'hybrid_Spacytagged_captions', 'hybrid_WNtagged_captions']
# Read multiple experiemnts
def multipleExpStats(rootPath, expList):
    avgRes = []
    stdRes = []
    for expName in expList:
        print(expName)
        print(os.path.join(rootPath,expName))
        expRes = readExpResults(os.path.join(rootPath,expName), 'concept')
        print(expRes)
        avgRes.append(latexAvgTableLine(expName, expRes))
        stdRes.append(latexStdTableLine(expName, expRes))
    return(avgRes,stdRes)

# Make statistics for multiple experiments
avgRes,stdRes = multipleExpStats(rootPath, expList)

# Output statistics for multiple experiments as latex lines
print("Averages:")
for line in avgRes: print(line)
print("")
print("Standard deviations:")
for line in stdRes: print(line)

hybrid_1536_512
/home/mrim/deviv/irim/pytorch/danieljf24/VisualSearch/msrvtt10k/test_results/hybrid_1536_512
runs_0 [  9.80  26.70  36.90  23.00  18.54  19.00  41.50  53.10   9.00   8.78 187.00]
runs_1 [  9.90  26.80  37.00  23.00  18.59  18.00  42.60  55.00   8.00   8.95 189.30]
runs_2 [  9.80  26.10  36.00  25.00  18.18  19.50  40.80  54.10   9.00   9.04 186.30]
runs_3 [  9.80  26.60  36.60  24.00  18.41  18.20  43.40  55.40   8.00   8.94 190.00]
runs_4 [  9.80  26.10  36.40  24.00  18.28  18.50  43.10  54.80   8.00   8.95 188.70]
runs_5 [  9.80  26.70  36.70  23.00  18.50  19.30  42.30  53.80   8.00   8.96 188.60]
runs_6 [  9.40  25.60  35.80  25.00  17.82  18.00  41.80  55.30   8.00   9.03 185.90]
runs_7 [  9.50  25.70  35.80  25.00  17.99  19.10  42.20  55.20   8.00   9.02 187.50]
runs_8 [  9.70  26.40  36.70  24.00  18.36  18.20  42.00  54.60   8.00   8.94 187.60]
runs_9 [  9.40  25.70  35.80  25.00  17.91  18.70  42.30  55.20   8.00   8.87 187.10]

[[  9.8   26.7   36.9   23.   

In [None]:
rootPath = "/home/mrim/deviv/VisualSearch/msrvtt10k/testresults"
expList = ["MR_DUAL", # A.1
           "MR_POS", # A.2
           "A_3_feat_name_resnext101-resnet152+pyresnet50-places365no_norm_", # A.3
           "No_POS_152_101_3D50_WN_no_norm", # A.4
           "A_5", # A.5
           "No_POS_152_101_50_3D50_WN_norm", # A.6
           "FULL_CONCAT_POS_wordnet", # A.7
           "FULL_CONCAT_POS", # None?
           "No_POS_152_101_50_3D50_WN_no_norm"]

# Read multiple experiemnts
def multipleExpStats(rootPath, expList):
    avgRes = []
    stdRes = []
    for expName in expList:
        print(expName)
        expRes = readExpResults(os.path.join(rootPath,expName))
        avgRes.append(latexAvgTableLine(expName, expRes))
        stdRes.append(latexStdTableLine(expName, expRes))
    return(avgRes,stdRes)

# Make statistics for multiple experiments
# avgRes,stdRes = multipleExpStats(rootPath, expList)

# Output statistics for multiple experiments as latex lines
print("Averages:")
# for line in avgRes: print(line)
print("")
print("Standard deviations:")
# for line in stdRes: print(line)

In [None]:
FRAME="dual_encoding_experiments"
IRIM="/home/mrim/tools/irim"

# Read all runs for an experiment, returns only data for "TPAMI" style tables
def readExpResultsSeqCon(expName="concept_dims_hybrid", eval_space="hybrid"):
    expPath = os.path.join(IRIM,"pytorch",FRAME,"VisualSearchResults","msrvtt10k","test_results", expName)
    runList = np.array([runName for runName in os.listdir(expPath)])
    runConceptDims = np.array([int(runName.split("_")[1]) for runName in runList], dtype=int)
    runIndexes = np.argsort(runConceptDims)
    runList, runConceptDims = runList[runIndexes], runConceptDims[runIndexes]
    runDims, runResults = [], []
    for i, runConceptDim in enumerate(runConceptDims):
        runPath = os.path.join(expPath,runList[i])
        runDims.append(runConceptDim)
        runResults.append(readRunResults(runPath, eval_space=eval_space)[[0, 1, 2, 3, 5, 7, 8, 9, 10, 12, 14]])
    return(np.array(runDims), np.array(runResults))

# Read all runs for an experiment, returns only data for "TPAMI" style tables
def readExpResultsSeq(expName="concept_dims_hybrid", eval_space="hybrid", log_file="logging.log"):
    expPath = os.path.join(IRIM,"pytorch",FRAME,"VisualSearchResults","msrvtt10k","test_results", expName)
    runList = np.array([runName for runName in os.listdir(expPath)])
    runDims = np.array([int(runName.split("_")[1]) for runName in runList], dtype=int)
    runIndexes = np.argsort(runDims)
    runList, runDims = runList[runIndexes], runDims[runIndexes]
    runResults = []
    for i in range(len(runList)):
        runPath = os.path.join(expPath,runList[i])
        runResult = readRunResults(runPath, eval_space=eval_space, log_file=log_file)
        runResults.append(runResult[[0, 1, 2, 3, 5, 7, 8, 9, 10, 12, 14]])
    return(runDims, np.array(runResults))

# Read all sequences of runs for an experiment, returns only data for "TPAMI" style tables
def readMultiExpResultsSeq(expName="concept_dims_hybrid", eval_space="hybrid"):
    expPath = os.path.join(IRIM,"pytorch",FRAME,"VisualSearchResults","msrvtt10k","test_results", expName)
    runList = np.array([runName for runName in os.listdir(expPath)])
    runDims = np.array([int(runName.split("_")[1]) for runName in runList], dtype=int)
    runIndexes = np.argsort(runDims)
    runList = runList[runIndexes]
    runDims, runResults = [], []
    for i in range(len(runList)):
        runPath = os.path.join(expPath,runList[i])
        logPath = os.path.join(expPath, runList[i], "model_best")
        logList = [logName for logName in os.listdir(logPath)]
        logList = list(filter(lambda logName: "logging_"in logName and not "_save" in logName, logList))
        logList = np.array(logList)
        logDims = np.array([int(logName.split("_")[1].split(".")[0]) for logName in logList], dtype=int)
        logIndexes = np.argsort(logDims)
        logList, logDims = logList[logIndexes], logDims[logIndexes]
        logResults = []
        for j in range(len(logList)):
            logResult = readRunResults(runPath, eval_space=eval_space, log_file="logging_%d.log" % logDims[j])
            logResults.append(logResult[[0, 1, 2, 3, 5, 7, 8, 9, 10, 12, 14]])
        runDims.append(logDims)
        runResults.append(np.array(logResults))
    return(runDims, runResults)

In [None]:
def runResultsToRunPlots6x2(runResults):
    runPlots = np.empty((6, 2, runResults.shape[0]))
    runPlots[0] = [runResults[:,0], runResults[:,5]]
    runPlots[1] = [runResults[:,1], runResults[:,6]]
    runPlots[2] = [runResults[:,2], runResults[:,7]]
    runPlots[3] = [runResults[:,0:3].sum(axis=1), runResults[:,5:8].sum(axis=1)]
    runPlots[4] = [runResults[:,4], runResults[:,9]]
    runPlots[5] = [runResults[:,10], runPlots[4,:].sum(axis=0)/2]
    return(runPlots)

def runResultsToRunPlots2x3(runResults):
    runPlots = np.empty((2, 3, runResults.shape[0]))
    runPlots[0][0] = runResults[:,0:3].sum(axis=1)
    runPlots[0][1] = runResults[:,5:8].sum(axis=1)
    runPlots[0][2] = runResults[:,10]
    runPlots[1][0] = runResults[:,4]
    runPlots[1][1] = runResults[:,9]
    runPlots[1][2] = runPlots[1,0:2].sum(axis=0)/2
    return(runPlots)

def avgStdPlots(runPlots):
    avgPlots = np.average(runPlots, axis=2)
    stdPlots = np.std(runPlots, axis=2, ddof=1)
    return(avgPlots, stdPlots)

titles6x2 = [["Text to Video R@1", "Video to Text R@1"], ["Text to Video R@5", "Video to Text R@5"],
             ["Text to Video R@10", "Video to Text R@10"], ["Text to Video Sum R", "Video to Text Sum R"],
             ["Text to Video MAP", "Video to Text MAP"], ["All Sum R", "All Average MAP"]]
titles2x3 = [["Text to Video Sum R", "Video to Text Sum R", "All Sum R"],
             ["Text to Video MAP", "Video to Text MAP", "All Average MAP"]]
ascolors = [['#00FF00', '#9FFF9F'], ['#FF0000', '#FF9F9F'],['#00FFFF', '#9FFFFF'], ['#FF7F00', '#FFBF9F']]
aslabels = ["Latent stable", "Optimal TTV", "Optimal VTT", "Optimal all"]

avgStdLatentPlots2x3 = [] # Global variable for the "latent-stable" averages and standard deviations
avgStdLatentPlots6x2 = [] # Inititialized on first call with non-empty zones
        
formatter = FuncFormatter(lambda y, _: '{:.16g}'.format(y))

def plotExpResults(runDims, runResults, zones, first=0, last=0, bottom=None, pdf=None, layout="2x3", loc=None):
    if zones[0][1] == 0: zones[0][1] = runDims.shape[0]
    if layout == "2x3":
        h, w, lh, lw = 2, 3, 14, 6
        titles, ext = titles2x3, "_2x3.pdf"
        runPlots = runResultsToRunPlots2x3(runResults)
        if len(avgStdLatentPlots2x3) == 0 and len(zones) > 0:
            avgStdLatentPlots2x3.append(avgStdPlots(runPlots[:,:,zones[0][0]:zones[0][1]]))
        if len(avgStdLatentPlots2x3) == 0 or len(zones) == 0: avgStds = []
        else: avgStds = [[avgStdLatentPlots2x3[0][0], avgStdLatentPlots2x3[0][1], zones[0][0], zones[0][1]]]
    if layout == "6x2":
        h, w, lh, lw = 6, 2, 14, 21
        titles, ext = titles6x2, "_6x2.pdf"
        runPlots = runResultsToRunPlots6x2(runResults)
        if len(avgStdLatentPlots6x2) == 0 and len(zones) > 0:
            avgStdLatentPlots6x2.append(avgStdPlots(runPlots[:,:,zones[0][0]:zones[0][1]]))
        if len(avgStdLatentPlots6x2) == 0 or len(zones) == 0: avgStds = []
        else: avgStds = [[avgStdLatentPlots6x2[0][0], avgStdLatentPlots6x2[0][1], zones[0][0], zones[0][1]]]
    for k in range(1, len(zones)):
        avg, std = avgStdPlots(runPlots[:,:,zones[k][0]:zones[k][1]])
        avgStds.append([avg, std, zones[k][0], zones[k][1]])
    if last == 0: last = runDims.shape[0]
    runDims, runPlots = runDims[first:last], runPlots[:,:,first:last]
    fig, axes = plt.subplots(h, w, figsize=(lh,lw))
    fig.tight_layout(h_pad = 3.0, w_pad = 1.5)
    for k, avgStd in enumerate(avgStds):
        [avgPlots, stdPlots, avgStdFirst, avgStdLast] = avgStd
        print(aslabels[k], [runDims[avgStdFirst-first], runDims[avgStdLast-first-1]])
    for i in range(h):
        for j in range(w):
            axes[i][j].set_xscale("log")
            axes[i][j].xaxis.set_major_formatter(formatter)
            axes[i][j].plot(runDims, runPlots[i][j])
            axes[i][j].set_title(titles[i][j])
            if bottom is not None: axes[i][j].set_ylim(bottom=bottom)
            for k, avgStd in enumerate(avgStds):
                [avgPlots, stdPlots, avgStdFirst, avgStdLast] = avgStd
                axes[i][j].plot(runDims[[avgStdFirst-first,avgStdLast-first-1]],[avgPlots[i][j], avgPlots[i][j]],
                                color=ascolors[k][0], label=aslabels[k])
                axes[i][j].fill_between(runDims[[avgStdFirst-first,avgStdLast-first-1]],
                                        [avgPlots[i][j]-stdPlots[i][j], avgPlots[i][j]-stdPlots[i][j]],
                                        [avgPlots[i][j]+stdPlots[i][j], avgPlots[i][j]+stdPlots[i][j]],
                                        facecolor=ascolors[k][1], linewidth=0)
                if loc is not None: axes[i][j].legend(loc=loc)
    if pdf is not None: plt.savefig("figures/"+ pdf + ext, format="pdf", bbox_inches="tight")
    
def plotMultiExpResults(runDims, runResults, first=0, last=0, bottom=None, pdf=None, layout="2x3", labels=None, loc=None):
    if layout == "2x3":
        h, w, lh, lw = 2, 3, 14, 6
        titles, ext = titles2x3, "_2x3.pdf"
        runPlots = [runResultsToRunPlots2x3(runResult) for runResult in runResults]
    if layout == "6x2":
        h, w, lh, lw = 6, 2, 14, 21
        titles, ext = titles6x2, "_6x2.pdf"
        runPlots = [runResultsToRunPlots6x2(runResult) for runResult in runResults]
    for k in range(len(runDims)):
        if last == 0: last = runDims[k].shape[0]
        runDims[k], runPlots[k] = runDims[k][first:last], runPlots[k][:,:,first:last]
    fig, axes = plt.subplots(h, w, figsize=(lh,lw))
    fig.tight_layout(h_pad = 3.0, w_pad = 1.5)
    for i in range(h):
        for j in range(w):
            axes[i][j].set_xscale("log")
            axes[i][j].xaxis.set_major_formatter(formatter)
            for k in range(len(runDims)):
                if labels is None: axes[i][j].plot(runDims[k], runPlots[k][i][j])
                else: axes[i][j].plot(runDims[k], runPlots[k][i][j], label=labels[k])
            axes[i][j].set_title(titles[i][j])
            if bottom is not None: axes[i][j].set_ylim(bottom=bottom)
            if loc is not None: axes[i][j].legend(loc=loc)
    if pdf is not None: plt.savefig("figures/"+ pdf + ext, format="pdf", bbox_inches="tight")

In [None]:
runDims, runResults = readMultiExpResultsSeq(expName="latent", eval_space="latent")
runDims, runResults = list(reversed(runDims[1:])), list(reversed(runResults[1:]))
plotMultiExpResults(runDims, runResults, first=6, pdf="PCA-perf.pdf")

### Results for latent training and latent decoding

Displays the original metrics as a function of the number of latent dimensions, full curve.<br>
Displays the mean and standard deviation in the stable region.

In [None]:
first, last = 45,106
runLatentDims, runLatentResults = readExpResultsSeq("latent_dims", eval_space="latent")
zones = [[first, last]]
plotExpResults(runLatentDims, runLatentResults, zones, pdf="latent_full", loc='lower right', layout="6x2")

In [None]:
plotExpResults(runLatentDims, runLatentResults, zones, pdf="latent_full", loc='lower right')

In [None]:
zones = [[first, last]]
plotExpResults(runLatentDims, runLatentResults, zones, pdf="latent", first=15, loc='lower right')

### Results for latent training and latent decoding

Displays the original metrics as a function of the number of latent dimensions, zoom on the stable region.<br>
Displays the mean and standard deviation in the stable region.

In [None]:
zones = [[first, last]]
plotExpResults(runLatentDims, runLatentResults, zones, pdf="latent_zoom", first=first, last=last, loc='lower right')

### Results for hybrid training and hybrid decoding

Displays the original metrics as a function of the number of concept dimensions (1536 latent dimensions).<br>
Displays the mean and standard deviation for the latent-only case in its stable region (green).<br>
Displays the mean and standard deviation for the TTV and VTT optimal regions (red and cyan).

In [None]:
runHybridDims, runHybridResults = readExpResultsSeq("concept_dims_hybrid", eval_space="hybrid")
zones = [[9, 0], [30,49], [60,79]]
plotExpResults(runHybridDims, runHybridResults, zones, pdf="hybrid_1536", first=zones[0][0], loc='lower right')

### Results for concept-only training and concept-only decoding in original conditions <br> (jaccard similarity, no xavier initialization, and no dropout)

Displays the original metrics as a function of the number of concept dimensions.<br>
Displays the mean and standard deviation for the latent-only case in its stable region (green).<br>
Displays the mean and standard deviation for the TTV and VTT optimal regions (red and cyan).

In [None]:
runConceptDims, runConceptResults = readExpResultsSeq("concept_dims", eval_space="concept")
zones = [[25, 0], [50, 62], [70, 104], [54, 65]]
plotExpResults(runConceptDims, runConceptResults, zones, pdf="concept", first=zones[0][0], loc='lower left')

### Results for concept-only training and concept-only decoding in "latent" conditions <br> (cosine similarity, with xavier initialization, and with dropout)

Displays the original metrics as a function of the number of concept dimensions.<br>
Displays the mean and standard deviation for the latent-only case in its stable region (green).<br>
Displays the mean and standard deviation for the TTV and VTT optimal regions (red and cyan).

In [None]:
runConceptCXDDims, runConceptCXDResults = readExpResultsSeq("concept_cxd_dims", eval_space="concept")
zones = [[25, 0], [42, 55], [61, 104], [52, 59]]
plotExpResults(runConceptCXDDims, runConceptCXDResults, zones, pdf="concept_cxd", first=zones[0][0], loc='lower center')

### Results for hybrid training and concept-only decoding

Displays the original metrics as a function of the number of concept dimensions (1536 latent dimensions).<br>
Displays the mean and standard deviation for the latent-only case in its stable region (green).<br>
Displays the mean and standard deviation for the TTV and VTT optimal regions (red and cyan).

In [None]:
runConceptHybridDims, runConceptHybridResults = readExpResultsSeq("concept_dims_hybrid", eval_space="concept")
zones = [[25, 0], [50, 63], [70, 104], [70, 87]]
plotExpResults(runConceptHybridDims, runConceptHybridResults, zones, pdf="concept_1536", first=zones[0][0],
               loc='lower right')

### Results for hybrid training and latent-only decoding
The co-training really helps: much better than with latent-only training, especially for TTV!

Displays the original metrics as a function of the number of concept dimensions during the co-training.<br>
Displays the mean and standard deviation for the latent-only case in its stable region (green).<br>
Displays the mean and standard deviation for the TTV and VTT optimal regions (red and cyan).

In [None]:
runLatentHybridDims, runLatentHybridResults = readExpResultsSeq("concept_dims_hybrid", eval_space="latent")
zones = [[0, 0], [21, 50], [17, 46]]
plotExpResults(runLatentHybridDims, runLatentHybridResults, zones, pdf="latent_1536", first=zones[0][0], loc='lower left')

### Results for concept-only training and concept-only decoding with spacy POS tagging in original conditions <br> (jaccard similarity, no xavier initialization, and no dropout)

Displays the original metrics as a function of the number of concept dimensions.<br>
Displays the mean and standard deviation for the latent-only case in its stable region (green).<br>
Displays the mean and standard deviation for the TTV and VTT optimal regions (red and cyan).

In [None]:
runConceptSpacyDims, runConceptSpacyResults = readExpResultsSeq("concept_dims_spacy", eval_space="concept")
zones = [[10, 0], [34, 46], [54, 90], [40, 48]]
plotExpResults(runConceptSpacyDims, runConceptSpacyResults, zones, pdf="concept_spacy", first=zones[0][0], loc='lower left')

In [None]:
plotMultiExpResults([runConceptDims, runConceptCXDDims, runConceptHybridDims, runLatentDims,
                     runLatentHybridDims, runHybridDims],
                    [runConceptResults, runConceptCXDResults, runConceptHybridResults, runLatentResults,
                     runLatentHybridResults, runHybridResults],
                    first=8, pdf="multi_all.pdf",
                    labels=["Concept", "ConceptCXD", "ConceptHybrid", "Latent",
                            "LatentHybrid", "Hybrid"], loc='lower center')

In [None]:
plotMultiExpResults([runConceptDims, runConceptCXDDims, runLatentDims],
                    [runConceptResults, runConceptCXDResults, runLatentResults],
                    first=15, pdf="multi_concept_latent.pdf",
                    labels=["Concept", "ConceptCXD", "Latent"], loc='lower right')

In [None]:
plotMultiExpResults([runConceptDims, runConceptCXDDims, runConceptHybridDims],
                    [runConceptResults, runConceptCXDResults, runConceptHybridResults],
                    first=15, pdf="multi_concept.pdf",
                    labels=["Concept", "ConceptCXD", "ConceptHybrid"], loc='lower right')

In [None]:
plotMultiExpResults([runConceptHybridDims, runLatentHybridDims, runHybridDims, runLatentDims, runConceptDims],
                   [runConceptHybridResults, runLatentHybridResults, runHybridResults, runLatentResults, runConceptResults],
                    first=11, pdf="multi_hybrid.pdf",
                    labels=["ConceptHybrid", "LatentHybrid", "Hybrid", "Latent", "Concept"], loc='lower center')

In [None]:
plotMultiExpResults([runConceptDims, runConceptSpacyDims],
                    [runConceptResults, runConceptSpacyResults],
                    first=9, pdf="multi_spacy.pdf",
                    labels=["Concept", "ConceptSpacy"], loc='lower right')

In [None]:
# http://homework.uoregon.edu/pub/class/es202/ztest.html
_, runLatent2048Results = readExpResultsSeq(expName="latent_2048", eval_space="latent")
_, runLatent2048jacResults = readExpResultsSeq(expName="latent_2048_jac", eval_space="latent")
_, runLatent2048jndResults = readExpResultsSeq(expName="latent_2048_jnd", eval_space="latent")
_, runLatent2048jnnResults = readExpResultsSeq(expName="latent_2048_jnn", eval_space="latent")
_, runLatent2048ndpResults = readExpResultsSeq(expName="latent_2048_ndp", eval_space="latent")
_, runLatent2048nxiResults = readExpResultsSeq(expName="latent_2048_nxi", eval_space="latent")
_, runLatent1536hybResults = readExpResultsSeq(expName="hybrid_1536_512", eval_space="latent")
_, runLatent1536hybcxResults = readExpResultsSeq(expName="hybrid_1536_512_cxi", eval_space="latent")
_, runLatent512cxdResults = readExpResultsSeq(expName="latent_512_cxd", eval_space="latent")
_, runLatent512jxdResults = readExpResultsSeq(expName="latent_512_jxd", eval_space="latent")
_, runConcept512Results = readExpResultsSeq(expName="concept_512", eval_space="concept")
_, runConcept512cosResults = readExpResultsSeq(expName="concept_512_cos", eval_space="concept")
_, runConcept512cxiResults = readExpResultsSeq(expName="concept_512_cxi", eval_space="concept")
_, runConcept256cxdResults = readExpResultsSeq(expName="concept_256_cxd", eval_space="concept")
_, runConcept512cxdResults = readExpResultsSeq(expName="concept_512_cxd", eval_space="concept")
_, runConcept256xiResults = readExpResultsSeq(expName="concept_256_xi", eval_space="concept")
_, runConcept512cxdnResults = readExpResultsSeq(expName="concept_512_cxdn", eval_space="concept")
_, runConcept512cxnnResults = readExpResultsSeq(expName="concept_512_cxnn", eval_space="concept")
_, runConcept512jxdResults = readExpResultsSeq(expName="concept_512_jxd", eval_space="concept")
_, runConcept512jxdnResults = readExpResultsSeq(expName="concept_512_jxdn", eval_space="concept")
_, runConcept512xiResults = readExpResultsSeq(expName="concept_512_xi", eval_space="concept")
_, runConcept512jxnnResults = readExpResultsSeq(expName="concept_512_jxnn", eval_space="concept")
_, runConcept512hybResults = readExpResultsSeq(expName="hybrid_1536_512", eval_space="concept")
_, runConcept512hybcxResults = readExpResultsSeq(expName="hybrid_1536_512_cxi", eval_space="concept")
_, runHybrid1536512Results = readExpResultsSeq(expName="hybrid_1536_512", eval_space="hybrid")
_, runHybrid1536512cosResults = readExpResultsSeq(expName="hybrid_1536_512_cos", eval_space="hybrid")
_, runHybrid1536512xiResults = readExpResultsSeq(expName="hybrid_1536_512_xi", eval_space="hybrid")
_, runHybrid1536512xinResults = readExpResultsSeq(expName="hybrid_1536_512_xin", eval_space="hybrid")
_, runHybrid1536512cxResults = readExpResultsSeq(expName="hybrid_1536_512_cxi", eval_space="hybrid")
_, runHybrid1536512jxnResults = readExpResultsSeq(expName="hybrid_1536_512_jxn", eval_space="hybrid")
_, runHybrid1536512jxdResults = readExpResultsSeq(expName="hybrid_1536_512_jxd", eval_space="hybrid")
_, runHybrid1536512jcxnResults = readExpResultsSeq(expName="hybrid_1536_512_jcxn", eval_space="hybrid")
_, runHybrid1536512noclassResults = readExpResultsSeq(expName="hybrid_1536_512_noclass", eval_space="hybrid")
_, runHybrid1536512noclxiResults = readExpResultsSeq(expName="hybrid_1536_512_nocl_xi", eval_space="hybrid")
_, runHybrid1536512noclxicosResults = readExpResultsSeq(expName="hybrid_1536_512_nocl_xi_cos", eval_space="hybrid")
print(latexAvgTableLine("Latent 2048 cxd*   ", runLatent2048Results))
print(latexAvgTableLine("Latent 2048 cxn    ", runLatent2048ndpResults))
print(latexAvgTableLine("Latent 2048 cnd    ", runLatent2048nxiResults))
print(latexAvgTableLine("Latent 2048 jxd    ", runLatent2048jacResults))
print(latexAvgTableLine("Latent 2048 jxn    ", runLatent2048jndResults))
print(latexAvgTableLine("Latent 2048 jnn    ", runLatent2048jnnResults))
print(latexAvgTableLine("Latent 1536 hyb    ", runLatent1536hybResults))
print(latexAvgTableLine("Latent 1536 hyb cx ", runLatent1536hybcxResults))
print(latexAvgTableLine("Latent 512 cxdn    ", runLatent512cxdResults))
print(latexAvgTableLine("Latent 512 jxdn    ", runLatent512jxdResults))
print(latexAvgTableLine("Concept 256 cxd    ", runConcept256cxdResults))
print(latexAvgTableLine("Concept 256 jxn    ", runConcept256xiResults))
print(latexAvgTableLine("Concept 512 cxd    ", runConcept512cxdResults))
print(latexAvgTableLine("Concept 512 cxdn   ", runConcept512cxdnResults))
print(latexAvgTableLine("Concept 512 cxnn   ", runConcept512cxnnResults))
print(latexAvgTableLine("Concept 512 cxn    ", runConcept512cxiResults))
print(latexAvgTableLine("Concept 512 cnn    ", runConcept512cosResults))
print(latexAvgTableLine("Concept 512 jxd    ", runConcept512jxdResults))
print(latexAvgTableLine("Concept 512 jxdn   ", runConcept512jxdnResults))
print(latexAvgTableLine("Concept 512 jxn    ", runConcept512xiResults))
print(latexAvgTableLine("Concept 512 jxnn   ", runConcept512jxnnResults))
print(latexAvgTableLine("Concept 512 jnn*   ", runConcept512Results))
print(latexAvgTableLine("Concept 512 hyb    ", runConcept512hybResults))
print(latexAvgTableLine("Concept 512 hyb cx ", runConcept512hybcxResults))
print(latexAvgTableLine("Hybrid 1536 512*   ", runHybrid1536512Results))
print(latexAvgTableLine("Hybrid 1536 512 co ", runHybrid1536512cosResults))
print(latexAvgTableLine("Hybrid 1536 512 xi ", runHybrid1536512xiResults))
print(latexAvgTableLine("Hybrid 1536 512 xin", runHybrid1536512xinResults))
print(latexAvgTableLine("Hybrid 1536 512 cx ", runHybrid1536512cxResults))
print(latexAvgTableLine("Hybrid 1536 512 jxn", runHybrid1536512jxnResults))
print(latexAvgTableLine("Hybrid 1536 512 jxd", runHybrid1536512jxdResults))
print(latexAvgTableLine("Hybrid 1536 512jcxn", runHybrid1536512jcxnResults))
print(latexAvgTableLine("Hybrid 1536 512   n", runHybrid1536512noclassResults))
print(latexAvgTableLine("Hybrid 1536 512 x n", runHybrid1536512noclxiResults))
print(latexAvgTableLine("Hybrid 1536 512cx n", runHybrid1536512noclxicosResults))
print('')
print(latexStdTableLine("Latent 2048 cxd*   ", runLatent2048Results))
print(latexStdTableLine("Latent 2048 cxn    ", runLatent2048ndpResults))
print(latexStdTableLine("Latent 2048 cnd    ", runLatent2048nxiResults))
print(latexStdTableLine("Latent 2048 jxd    ", runLatent2048jacResults))
print(latexStdTableLine("Latent 2048 jxn    ", runLatent2048jndResults))
print(latexStdTableLine("Latent 2048 jnn    ", runLatent2048jnnResults))
print(latexStdTableLine("Latent 1536 hyb    ", runLatent1536hybResults))
print(latexStdTableLine("Latent 512 cxdn    ", runLatent512cxdResults))
print(latexStdTableLine("Latent 512 jxdn    ", runLatent512jxdResults))
print(latexStdTableLine("Concept 256 cxd    ", runConcept256cxdResults))
print(latexStdTableLine("Concept 256 jxn    ", runConcept256xiResults))
print(latexStdTableLine("Concept 512 cxd    ", runConcept512cxdResults))
print(latexStdTableLine("Concept 512 cxdn   ", runConcept512cxdnResults))
print(latexStdTableLine("Concept 512 cxnn   ", runConcept512cxnnResults))
print(latexStdTableLine("Concept 512 cxn    ", runConcept512cxiResults))
print(latexStdTableLine("Concept 512 cnn    ", runConcept512cosResults))
print(latexStdTableLine("Concept 512 jxd    ", runConcept512jxdResults))
print(latexStdTableLine("Concept 512 jxdn   ", runConcept512jxdnResults))
print(latexStdTableLine("Concept 512 jxn    ", runConcept512xiResults))
print(latexStdTableLine("Concept 512 jxnn   ", runConcept512jxnnResults))
print(latexStdTableLine("Concept 512 jnn*   ", runConcept512Results))
print(latexStdTableLine("Concept 512 hyb    ", runConcept512hybResults))
print(latexStdTableLine("Concept 512 hyb cx ", runConcept512hybcxResults))
print(latexStdTableLine("Hybrid 1536 512*   ", runHybrid1536512Results))
print(latexStdTableLine("Hybrid 1536 512 co ", runHybrid1536512cosResults))
print(latexStdTableLine("Hybrid 1536 512 xi ", runHybrid1536512xiResults))
print(latexStdTableLine("Hybrid 1536 512 xin", runHybrid1536512xinResults))
print(latexStdTableLine("Hybrid 1536 512 cx ", runHybrid1536512cxResults))
print(latexStdTableLine("Hybrid 1536 512 jxn", runHybrid1536512jxnResults))
print(latexStdTableLine("Hybrid 1536 512 jxd", runHybrid1536512jxdResults))
print(latexStdTableLine("Hybrid 1536 512jcxn", runHybrid1536512jcxnResults))
print(latexStdTableLine("Hybrid 1536 512   n", runHybrid1536512noclassResults))
print(latexStdTableLine("Hybrid 1536 512  xn", runHybrid1536512noclxiResults))
print(latexStdTableLine("Hybrid 1536 512cx n", runHybrid1536512noclxicosResults))

In [None]:
# os.system('color')
from termcolor import colored
# print(colored('hello', 'red'), colored('world', 'green'))

# Consistency check
Latent = Concept- (weight for classification loss set to zero) $\rightarrow$ small Z values. <br>
|Z| > 1.65 ~ p < 0.05 <br>
|Z| > 2.33 ~ p < 0.01

In [None]:
print(latexTableLineZ("Latent 512 cxd     ", runLatent512cxdResults,
                      "Concept 512 cxd-   ", runConcept512cxdnResults,
                      "Lat. vs Con- cos   "))
print('')
print(latexTableLineZ("Latent 512 jxd     ", runLatent512jxdResults,
                      "Concept 512 jxd-   ", runConcept512jxdnResults,
                      "Lat. vs Con- jac   "))

# Dropout
Good with cosine similarity, regardless of latent or concept. <br>
Bad or unclear with jaccard similarity, regardless of latent or concept.

In [None]:
print(latexTableLineZ("Latent 2048 cxn    ", runLatent2048ndpResults,
                      "Latent 2048 cxd    ", runLatent2048Results,
                      "Lat. cos + dropout "))
print('')
print(latexTableLineZ("Concept 512 cxn-   ", runConcept512cxnnResults,
                      "Concept 512 cxd-   ", runConcept512cxdnResults,
                      "Con- cos + dropout "))
print('')
print(latexTableLineZ("Concept 512 cxn+   ", runConcept512cxiResults,
                      "Concept 512 cxd+   ", runConcept512cxdResults,
                      "Con+ cos + dropout "))
print('')
print(latexTableLineZ("Latent 2048 jxn    ", runLatent2048jndResults,
                      "Latent 2048 jxd    ", runLatent2048jacResults,
                      "Lat. jac + dropout "))
print('')
print(latexTableLineZ("Concept 512 jxn-   ", runConcept512jxnnResults,
                      "Concept 512 jxd-   ", runConcept512jxdnResults,
                      "Con- jac + dropout "))
print('')
print(latexTableLineZ("Concept 512 jxn+   ", runConcept512xiResults,
                      "Concept 512 jxd+   ", runConcept512jxdResults,
                      "Con+ jac + dropout "))

# Xavier initialization
It always improve, most often significantly, possibly linked with the use of early stopping.

In [None]:
print(latexTableLineZ("Latent 2048 cnd    ", runLatent2048nxiResults,
                      "Latent 2048 cxd    ", runLatent2048Results,
                      "Lat. cos dp + xi   "))
print('')
print(latexTableLineZ("Latent 2048 jnn    ", runLatent2048jnnResults,
                      "Latent 2048 jxn    ", runLatent2048jndResults,
                      "Lat. jac ndp + xi  "))
print('')
print(latexTableLineZ("Concept 512 jnn+   ", runConcept512Results,
                      "Concept 512 jxn+   ", runConcept512xiResults,
                      "Con+ jac ndp + xi  "))
print('')
print(latexTableLineZ("Concept 512 cnn    ", runConcept512cosResults,
                      "Concept 512 cxn    ", runConcept512cxiResults,
                      "Con+ cos ndp + xi  "))
print('')
print(latexTableLineZ("Hybrid 1536 512*   ", runHybrid1536512Results,
                      "Hybrid 1536 512 xi ", runHybrid1536512xiResults,
                      "Hyb+ c.j dp.n + xi "))
print('')
print(latexTableLineZ("Hybrid 1536 512 co ", runHybrid1536512cosResults,
                      "Hybrid 1536 512 cx ", runHybrid1536512cxResults,
                      "Hyb+ c.c dp.n + xi "))

# Cosine (dot product of L2-normalized vector) versus Jaccard (with sigmoid)
Jaccard significantly improves on latent with dropout. <br>
Jaccard significantly degrades on concept+ with dropout. <br>
Jaccard overall improves on latent / concept- without dropout but slightly degrades on Text to Video. <br>
Jaccard slightly degrades on concept+ without dropout

In [None]:
print(latexTableLineZ("Latent 2048 cxn    ", runLatent2048ndpResults,
                      "Latent 2048 jxn    ", runLatent2048jndResults,
                      "Lat. nd.xi cos->jac"))
print('')
print(latexTableLineZ("Concept 512 cxn-   ", runConcept512cxnnResults,
                      "Concept 512 jxn-   ", runConcept512jxnnResults,
                      "Con- nd.xi cos->jac"))
print('')
print(latexTableLineZ("Concept 512 cxd+   ", runConcept512cxdResults,
                      "Concept 512 jxd+   ", runConcept512jxdResults,
                      "Con+ dp.xi cos->jac"))
print('')
print(latexTableLineZ("Latent 2048 cxd    ", runLatent2048Results,
                      "Latent 2048 jxd    ", runLatent2048jacResults,
                      "Lat. dp.xi cos->jac"))
print('')
print(latexTableLineZ("Concept 512 cxd-   ", runConcept512cxdnResults,
                      "Concept 512 jxd-   ", runConcept512jxdnResults,
                      "Con- dp.xi cos->jac"))
print('')
print(latexTableLineZ("Concept 512 cxn+   ", runConcept512cxiResults,
                      "Concept 512 jxn+   ", runConcept512xiResults,
                      "Con+ nd.xi cos->jac"))
print('')
print(latexTableLineZ("Hybrid 1536 512    ", runHybrid1536512Results,
                      "Hybrid 1536 512 co ", runHybrid1536512cosResults,
                      "cos.jac -> cos.cos "))
print('')
print(latexTableLineZ("Hybrid 1536 512 xi ", runHybrid1536512xiResults,
                      "Hybrid 1536 512 cx ", runHybrid1536512cxResults,
                      "cos.jac -> cos.cos "))
print('')
print(latexTableLineZ("Hybrid 1536 512 xi ", runHybrid1536512xiResults,
                      "Hybrid 1536 512 jxn", runHybrid1536512jxnResults,
                      "cos.jac -> jac.jac "))

# Adding the classification constraint in concept or hybrid
Significant negative effect on concept with jaccard similarity
Positive effect on concept with cosine similarity
Slight negative effect on Hybrid

In [None]:
print(latexTableLineZ("Concept 512 jxn-   ", runConcept512jxnnResults,
                      "Concept 512 jxn+   ", runConcept512xiResults,
                      "Con-c.nd.xi -> Con+"))
print('')
print(latexTableLineZ("Concept 512 cxn-   ", runConcept512cxnnResults,
                      "Concept 512 cxn+   ", runConcept512cxiResults,
                      "Con-j.nd.xi -> Con+"))
print('')
print(latexTableLineZ("Hybrid 1536 512 xin", runHybrid1536512xinResults,
                      "Hybrid 1536 512 xi ", runHybrid1536512xiResults,
                      "Hyb-j.nd.xi -> Hyb+"))


# Influence of vocabulary size in the "latent" ("CXD") setting
Significant positive effect for 512->256 on TTV, no significant effect on VTT

In [None]:
print(latexTableLineZ("Concept 256 cxd+   ", runConcept256cxdResults,
                      "Concept 512 cxd+   ", runConcept512cxdResults,
                      "Con+ c.x.d 256->512"))

# PCA

In [None]:
_, runHybrid1536512xinsaveResults = readExpResultsSeq(expName="hybrid_1536_512_xin", eval_space="hybrid",
                                                  log_file="logging_save.log")

_, runHybrid1536512xinp0p128Results = readExpResultsSeq(expName="hybrid_1536_512_xin", eval_space="hybrid",
                                                  log_file="logging_pca_0_128.log")

print(latexTableLineZ("Hybrid 1536 512 xin     ", runHybrid1536512xinResults,
                      "Hybrid 1536 512 xin save", runHybrid1536512xinsaveResults,
                      "Hyb-j.nd.xi -> save     "))
print("")
print(latexTableLineZ("Hybrid 1536 512 xin     ", runHybrid1536512xinResults,
                      "Hybrid 1536 512 xin p128", runHybrid1536512xinp0p128Results,
                      "Hyb-j.nd.xi -> p128     "))

# 512-512 Hybrid study

In [None]:
_, runHybrid512h512Results = readExpResultsSeq(expName="hybrid_512_512", eval_space="hybrid")
_, runHybrid512h512xiResults = readExpResultsSeq(expName="hybrid_512_512_xi", eval_space="hybrid")
_, runHybrid512h512noclxiResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi", eval_space="hybrid")
_, runHybrid512h512noclxidpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_dp", eval_space="hybrid")
_, runHybrid512h512noclxicosResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_cos", eval_space="hybrid")
_, runHybrid512h512noclxicosdpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_cos_dp", eval_space="hybrid")

_, runHybrid512h512noclxicosdpfuseResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_cos_dp",
                                                              eval_space="hybrid", log_file="logging_fuse.log")

_, runHybrid512h512noclxijacdpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_jac_dp", eval_space="hybrid")
_, runHybrid512h512noclxijacndpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_jac_ndp", eval_space="hybrid")
print(latexAvgTableLine("Hybrid 1536 512*   ", runHybrid1536512Results))
print(latexAvgTableLine("Hybrid 512 512     ", runHybrid512h512Results))
print(latexAvgTableLine("Hybrid 512 512 x   ", runHybrid512h512xiResults))
print(latexAvgTableLine("Hybrid 512 512 x n ", runHybrid512h512noclxiResults))
print(latexAvgTableLine("Hybrid 512 512 xdn ", runHybrid512h512noclxidpResults))
print(latexAvgTableLine("Hybrid 512 512cx n ", runHybrid512h512noclxicosResults))
print(latexAvgTableLine("Hybrid 512 512cxdn ", runHybrid512h512noclxicosdpResults))
print(latexAvgTableLine("Hybrid 512 512cxdnf", runHybrid512h512noclxicosdpfuseResults))
print(latexAvgTableLine("Hybrid 512 512jxdn ", runHybrid512h512noclxijacdpResults))
print(latexAvgTableLine("Hybrid 512 512jxmn ", runHybrid512h512noclxijacndpResults))
print('')
print(latexStdTableLine("Hybrid 1536 512*   ", runHybrid1536512Results))
print(latexStdTableLine("Hybrid 512 512     ", runHybrid512h512Results))
print(latexStdTableLine("Hybrid 512 512 x   ", runHybrid512h512xiResults))
print(latexStdTableLine("Hybrid 512 512 x n ", runHybrid512h512noclxiResults))
print(latexStdTableLine("Hybrid 512 512 xdn ", runHybrid512h512noclxidpResults))
print(latexStdTableLine("Hybrid 512 512cx n ", runHybrid512h512noclxicosResults))
print(latexStdTableLine("Hybrid 512 512cxdn ", runHybrid512h512noclxicosdpResults))
print(latexStdTableLine("Hybrid 512 512cxdnf", runHybrid512h512noclxicosdpfuseResults))
print(latexStdTableLine("Hybrid 512 512jxdn ", runHybrid512h512noclxijacdpResults))
print(latexStdTableLine("Hybrid 512 512jxmn ", runHybrid512h512noclxijacndpResults))
print('')
print(latexTableLineZ("Hybrid 1536 512    ", runHybrid1536512Results,
                      "Hybrid 512 512     ", runHybrid512h512Results,
                      "Hyb1536 -> Hyb512  "))
print('Decreasing latent space size to 512: marginally statistically signficant decrease in performance.')
print('')
print(latexTableLineZ("Hybrid 512 512     ", runHybrid512h512Results,
                      "Hybrid 512 512 x   ", runHybrid512h512xiResults,
                      "Hyb+xi -> Hyb-     "))
print('Adding xavier initialization to concept: marginally statistically signficant increase in performance.')
print('')
print(latexTableLineZ("Hybrid 512 512 x n ", runHybrid512h512noclxiResults,
                      "Hybrid 512 512 x   ", runHybrid512h512xiResults,
                      "Hyb+xi -> Hyb-     "))
print('Adding classification: statistically significant (overall but not for TTV) yet moderate decrease in performance.')
print('')
print(latexTableLineZ("Hybrid 512 512cx n ", runHybrid512h512noclxicosResults,
                      "Hybrid 512 512 x n ", runHybrid512h512xiResults,
                      "Hyb-xi-cc -> Hyb-cj"))
print('Significant performance gain when changing from cos-cos to cos-jac (without classification)')
print('')
print(latexTableLineZ("Hybrid 512 512 x n ", runHybrid512h512noclxiResults,
                      "Hybrid 512 512 xdn ", runHybrid512h512noclxidpResults,
                      "Hyb-co-xi-> Hyb-dp "))
print('No significant performance gain when adding dropout after jac mapping (cos-jac without classification)')
print('')
print(latexTableLineZ("Hybrid 512 512cx n ", runHybrid512h512noclxicosResults,
                      "Hybrid 512 512cxdn ", runHybrid512h512noclxicosdpResults,
                      "Hyb-co-xi-> Hyb-dp "))
print('Significant performance gain when adding dropout after cos mapping (cos-cos without classification)')

In [None]:
_, runLatent1536cxdResults = readExpResultsSeq(expName="latent_1536", eval_space="latent")
_, runLatent512cxdResults = readExpResultsSeq(expName="latent_512_cxd", eval_space="latent")
_, runLatent512jxdResults = readExpResultsSeq(expName="latent_512_jxd", eval_space="latent")
_, runLatent1536512Results = readExpResultsSeq(expName="hybrid_1536_512", eval_space="latent")
_, runLatent512h512Results = readExpResultsSeq(expName="hybrid_512_512", eval_space="latent")
_, runLatent512h512xiResults = readExpResultsSeq(expName="hybrid_512_512_xi", eval_space="latent")
_, runLatent512h512noclxiResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi", eval_space="latent")
_, runLatent512h512noclxidpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_dp", eval_space="latent")
_, runLatent512h512noclxicosResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_cos", eval_space="latent")
_, runLatent512h512noclxicosdpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_cos_dp", eval_space="latent")
_, runLatent512h512noclxijacdpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_jac_dp", eval_space="latent")
_, runLatent512h512noclxijacndpResults = readExpResultsSeq(expName="hybrid_512_512_nocl_xi_jac_ndp", eval_space="latent")
# print(latexAvgTableLine("Latent 512j        ", runLatent512jxdResults))
# print('')
# print(latexAvgTableLine("Latent 1536 512*   ", runLatent1536512Results))
# print(latexAvgTableLine("Latent 512 512 xdn ", runLatent512h512noclxidpResults))
# print(latexAvgTableLine("Latent 512 512cx n ", runLatent512h512noclxicosResults))
# print(latexAvgTableLine("Latent 512 512jxdn ", runLatent512h512noclxijacdpResults))
# print(latexAvgTableLine("Latent 512 512jxmn ", runLatent512h512noclxijacndpResults))
# print('')
print(latexAvgTableLine("Latent 1536c       ", runLatent1536cxdResults)) #
print(latexAvgTableLine("Latent 512c        ", runLatent512cxdResults)) #
print(latexAvgTableLine("Latent 512 512cxdn ", runLatent512h512noclxicosdpResults)) #
print(latexAvgTableLine("Latent 512 512 x n ", runLatent512h512noclxiResults)) #
print(latexAvgTableLine("Latent 512 512 x   ", runLatent512h512xiResults)) #
print(latexAvgTableLine("Latent 512 512     ", runLatent512h512Results))
print('')
print(latexTableLineZ("Latent 1536 indep. ", runLatent1536cxdResults,
                      "Latent 512 indep.  ", runLatent512cxdResults,
                      "Lat1536 -> Lat512  "))
print('Decreasing latent space size to 512: no statistically signficant difference in performance.')
print('')
print(latexTableLineZ("Latent 512 indep.  ", runLatent512cxdResults,
                      "Latent 512 coupled ", runLatent512h512noclxicosdpResults,
                      "Indep. -> coupled  "))
print('Independent training -> coupled training: signficant increase in performance.')
print('')
print(latexTableLineZ("Latent 512 coupled ", runLatent512h512noclxicosdpResults,
                      "Latent 512 co. het.", runLatent512h512noclxiResults,
                      "coupled -> +het.   "))
print('Coupled homogeneous training -> heterogeneous: signficant increase in performance.')
print('')
print(latexTableLineZ("Latent 512 het.    ", runLatent512h512noclxiResults,
                      "Latent 512 het.cla.", runLatent512h512xiResults,
                      "Latent het.-> +cla."))
print('Coupled homogeneous training -> heterogeneous: no statistically signficant difference in performance.')
print('')
print(latexTableLineZ("Latent 512 het.cla.", runLatent512h512xiResults,
                      "Latent 512 no Xav. ", runLatent512h512Results,
                      "Remove Xav. init.  "))
print('Coupled homogeneous training -> heterogeneous: marginally statistically signficant decrease in performance.')