In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

In [3]:
from utils.intermediate_layer_extraction import return_layer_input
from utils.ri_calculator import computeRi,computeRiNaif
import deepmatcher as dm
import torch
from tqdm import tqdm
import pandas as pd

In [4]:
hybrid_model = dm.MatchingModel(attr_summarizer='hybrid')
hybrid_model.load_state('../models/itunes_amazon_hybrid.pth')

In [5]:
negatives,neg_ids = return_layer_input('../Structured/itunes-amazon','negatives',128,
                                      hybrid_model,hybrid_model.classifier,device=-1)

In [6]:
positives,pos_ids = return_layer_input('../Structured/itunes-amazon','positives',128,
                                      hybrid_model,hybrid_model.classifier,device=-1)

In [7]:
hybrid_model = hybrid_model.cuda()

In [None]:
closest_distance_neg = []
attribute_indexes = list(range(8))
for batch in negatives:
    for sample in tqdm(batch):
        distances_onAttribute = list(map(lambda idx: findCloserV2(sample,positives,1,hybrid_model.classifier,
                                                               idx,150),attribute_indexes))
        closest_distance_neg.append(distances_onAttribute)

## Test on algorithm

In [None]:
j = 0
for batch in negatives:
    for sample in batch:
        for i in range(8):
            closer = closest_distance_neg[j][i]
            distance = torch.norm(closer).data[0]
            start_idx = i*150
            end_idx = start_idx+150
            if distance >0:
                sample_batch = torch.unsqueeze(sample.clone(),0)
                sample_batch[:,start_idx:end_idx] = closer
                prediction = hybrid_model.classifier.forward(sample_batch)
                assert (prediction.data[0][1]>prediction.data[0][0]),"All predictions must be positive"
        j +=1

## Positive analysis

In [None]:
closest_distance_pos = []
for batch in positives:
    for sample in tqdm(batch):
        distances_onAttribute = list(map(lambda idx: findCloserV2(sample,negatives,0,hybrid_model.classifier,
                                                               idx,150),attribute_indexes))
        closest_distance_pos.append(distances_onAttribute)

In [None]:
j = 0
for batch in positives:
    for sample in batch:
        for i in range(8):
            closer,distance = closest_distance_pos[j][i],torch.norm(closest_distance_pos[j][i]).data[0]
            start_idx = i*150
            end_idx = start_idx+150
            if distance >0:
                sample_batch = torch.unsqueeze(sample.clone(),0)
                sample_batch[:,start_idx:end_idx] = closer
                prediction = hybrid_model.classifier.forward(sample_batch)
                assert (prediction.data[0][0]>prediction.data[0][1]),"All predictions must be negatives"
        j +=1

## Compare naif approach with gradient approach

### Negatives

In [15]:
def compareMethods(df1,df2,columns):
    best1,best2 = (0,0)
    for i in df1.index:
        for col in columns:
            lval = df1.at[i,col]
            rval = df2.at[i,col]
            if(lval == 0 and rval >0) or(rval <lval and rval >0):
                best2+=1
            elif(rval ==0 and lval >0)or(lval < rval and lval >0):
                best1+=1
    return (best1,best2)

In [8]:
attributes = ['Song_Name','Artist_Name','Album_Name','Genre','Price','CopyRight','Time','Released']

In [10]:
neg_ri,neg_ri_norms = computeRi(hybrid_model.classifier,attributes,negatives,150,1)

100%|██████████| 128/128 [00:19<00:00,  6.56it/s]
100%|██████████| 128/128 [00:20<00:00,  6.28it/s]
100%|██████████| 23/23 [00:03<00:00,  7.23it/s]
100%|██████████| 128/128 [00:20<00:00,  6.24it/s]


In [12]:
neg_ri_naif,neg_ri_norms_naif = computeRiNaif(negatives,positives,1,hybrid_model.classifier,attributes,150)

100%|██████████| 128/128 [00:10<00:00, 11.95it/s]
100%|██████████| 128/128 [00:10<00:00, 12.61it/s]
100%|██████████| 23/23 [00:02<00:00, 11.41it/s]
100%|██████████| 128/128 [00:10<00:00, 12.66it/s]


In [13]:
neg_variation_gradient = pd.DataFrame(data= neg_ri_norms,columns=attributes)
neg_variation_naif = pd.DataFrame(data= neg_ri_norms_naif,columns=attributes)

In [16]:
compareMethods(neg_variation_gradient,neg_variation_naif,attributes)

(2705, 351)

### Positives

In [31]:
pos_ri,pos_ri_norms = computeRi(hybrid_model.classifier,attributes,positives,150,0)

100%|██████████| 4/4 [00:01<00:00,  3.66it/s]
100%|██████████| 128/128 [00:26<00:00,  4.86it/s]


In [19]:
pos_ri_naif,pos_ri_norms_naif = computeRiNaif(positives,negatives,0,hybrid_model.classifier,attributes,150)

100%|██████████| 4/4 [00:00<00:00,  4.37it/s]
100%|██████████| 128/128 [00:28<00:00,  4.47it/s]


In [32]:
pos_perturbations_gradient = pd.DataFrame(data= pos_ri_norms,columns=attributes)
pos_perturbations_naif = pd.DataFrame(data= pos_ri_norms_naif,columns=attributes)

In [34]:
compareMethods(pos_perturbations_gradient,pos_perturbations_naif,attributes)

(909, 51)