In [1]:
import pandas as pd
import deepmatcher as dm

In [2]:
train = pd.read_csv('sample_data/itunes-amazon/train.csv')
validation = pd.read_csv('sample_data/itunes-amazon/validation.csv')
test = pd.read_csv('sample_data/itunes-amazon/test.csv')

In [9]:
train_positives = train[train['label']==1]
train_negatives = train[train['label']==0]

In [10]:
def get_price_distribution(df):
    prices = {}
    for idx,row in df.iterrows():
        current_prices = row['left_Price']+ " "+ row['right_Price']
        if current_prices in prices:
            prices[current_prices] += 1
        else:
            prices[current_prices] = 1
    return prices

In [6]:
price_negatives = get_price_distribution(train_negatives)
price_negatives

{'$ 1.29 $ 1.29': 153,
 'Album Only $ 1.29': 20,
 '$ 1.29 $ 0.99': 18,
 '$ 1.99 $ 1.29': 25,
 '$ 1.29 $ 0.69': 3,
 '$ 0.99 $ 0.99': 11,
 '$ 1.99 $ 0.99': 3,
 '$ 0.99 $ 1.29': 11,
 '$ 0.99 $ 0.89': 2}

In [7]:
price_positives = get_price_distribution(train_positives)
price_positives

{'$ 0.99 $ 0.99': 17,
 '$ 1.29 $ 1.29': 58,
 '$ 1.99 $ 1.29': 1,
 '$ 1.29 $ 0.89': 1}

### Evaluation changing prices

In [8]:
hybrid_model = dm.MatchingModel(attr_summarizer='hybrid')

In [9]:
hybrid_model.load_state('models/hybrid1.pth')
hybrid_model.cuda()

MatchingModel(
  (attr_summarizers): ModuleMap(
    (Song_Name): Hybrid(
      (word_contextualizer): RNN(
        (rnn_groups): ModuleList(
          (0): GRU(300, 150, batch_first=True, bidirectional=True)
        )
        (dropouts): ModuleList(
          (0): Dropout(p=0)
        )
        (bypass_networks): ModuleList(
          (0): None
        )
        (input_dropout): NoMeta(
          (module): Dropout(p=0)
        )
      )
      (word_comparator): Attention(
        (alignment_networks): ModuleList(
          (0): AlignmentNetwork(
            (transform): Transform(
              (transforms): ModuleList(
                (0): Linear(in_features=300, out_features=300, bias=True)
                (1): Linear(in_features=300, out_features=300, bias=True)
              )
              (bypass_networks): ModuleList(
                (0): Bypass(
                  (highway_gate): Linear(in_features=300, out_features=300, bias=True)
                )
                (1): Bypass(


### Evaluate model on standard test set

In [10]:
test_pos = dm.data.process_unlabeled('sample_data/itunes-amazon/test_positives.csv',trained_model=hybrid_model,
                                ignore_columns = ['id','label'])

In [11]:
standard_pred = hybrid_model.run_prediction(test_pos,output_attributes=True)

===>  PREDICT Epoch 7
Finished Epoch 7 || Run Time:    0.4 | Load Time:    0.1 || F1:   0.00 | Prec:   0.00 | Rec:   0.00 || Ex/s:   0.00



In [12]:
standard_pred_neg = standard_pred[standard_pred['match_score']<=0.5]
standard_pred_neg.shape

(5, 18)

### Evaluate model on altered test set (on price)

In [13]:
test_positives = pd.read_csv('sample_data/itunes-amazon/test_positives.csv')

In [5]:
def alter_price(df,left_pr,right_pr):
    df['left_Price'] = left_pr
    df['right_Price'] = right_pr
    return df

In [16]:
test_pos_altered = alter_price(test_positives,'Album Only','$ 1.29')

In [17]:
test_pos_altered.to_csv('sample_data/itunes-amazon/exp6/test_positives_altered_price.csv',index=False)

In [18]:
test_pos_altered_price = dm.data.process_unlabeled('sample_data/itunes-amazon/exp6/test_positives_altered_price.csv'
                                                   ,trained_model=hybrid_model,ignore_columns=['id','label'])

In [19]:
altered_pred = hybrid_model.run_prediction(test_pos_altered_price,output_attributes=True)

===>  PREDICT Epoch 7
Finished Epoch 7 || Run Time:    0.1 | Load Time:    0.1 || F1:   0.00 | Prec:   0.00 | Rec:   0.00 || Ex/s:   0.00



In [20]:
altered_pred_neg = altered_pred[altered_pred['match_score'] <=0.5]
altered_pred_neg.shape

(8, 18)

### Evaluate closer vectors on classifier space

In [32]:
all_neg = pd.read_csv('sample_data/itunes-amazon/exp3/negative_samples.csv')

In [41]:
closer_negatives = pd.read_csv('experiments-results/exp3/positives_closer_vectors_on_attribute.csv',sep=';')
closer_negatives.tail()

Unnamed: 0,Song_Name,Artist_Name,Album_Name,Genre,Price,CopyRight,Time,Released,Pos_sample_ID
127,206,422,83,422,281,297,472,199,502
128,438,418,491,398,281,420,290,397,250
129,426,402,341,401,281,199,472,290,313
130,180,418,492,182,281,420,256,397,276
131,244,418,491,421,281,420,233,397,503


In [38]:
join_df =pd.merge(left=all_neg,right=closer_negatives,left_on='id',right_on='Price')

In [45]:
interested_columns = join_df.loc[:, ['id','left_Price','right_Price']]

Unnamed: 0,id,left_Price,right_Price
0,243,$ 0.99,$ 1.29
1,328,$ 1.99,$ 0.99
2,328,$ 1.99,$ 0.99
3,328,$ 1.99,$ 0.99
4,281,Album Only,$ 1.29
5,281,Album Only,$ 1.29
6,281,Album Only,$ 1.29
7,281,Album Only,$ 1.29
8,281,Album Only,$ 1.29
9,281,Album Only,$ 1.29


In [46]:
interested_columns.to_csv('experiments-results/exp6/closer_negatives_prices.csv',index=False)

## Create new training set with different price distribution

In [21]:
for idx,row in train.iterrows():
    if row['label']==1:
        if idx % 2 ==0:
            train.at[idx,'left_Price'] = '$ 0.99'
            train.at[idx,'right_Price'] = '$ 0.99'
        else:
            train.at[idx,'left_Price'] = '$ 1.29'
            train.at[idx,'right_Price'] = '$ 1.29'
    else:
        if idx % 2 == 0:
            train.at[idx,'left_Price'] = 'Album Only'
            train.at[idx,'right_Price'] = '$ 1.29'
        else:
            train.at[idx,'left_Price'] = '$ 1.29'
            train.at[idx,'right_Price'] = '$ 1.29'

In [22]:
for idx,row in validation.iterrows():
    if row['label']==1:
        if idx % 2 ==0:
            validation.at[idx,'left_Price'] = '$ 0.99'
            validation.at[idx,'right_Price'] = '$ 0.99'
        else:
            validation.at[idx,'left_Price'] = '$ 1.29'
            validation.at[idx,'right_Price'] = '$ 1.29'
    else:
        if idx % 2 == 0:
            validation.at[idx,'left_Price'] = 'Album Only'
            validation.at[idx,'right_Price'] = '$ 1.29'
        else:
            validation.at[idx,'left_Price'] = '$ 1.29'
            validation.at[idx,'right_Price'] = '$ 1.29'

In [23]:
train.to_csv('sample_data/itunes-amazon/exp6/train_newprices.csv',index=False)

In [24]:
validation.to_csv('sample_data/itunes-amazon/exp6/validation_newprices.csv',index=False)

In [25]:
train,valid,test = dm.data.process('sample_data/itunes-amazon/exp6',train='train_newprices.csv',
                                   validation='validation_newprices.csv',test='test.csv',)



In [26]:
hybrid_model = dm.MatchingModel(attr_summarizer='hybrid')

In [27]:
hybrid_model.run_train(train,valid,best_save_path='models/hybrid_pricebias.pth',pos_neg_ratio=4,batch_size=16)

* Number of trainable parameters: 17757810
===>  TRAIN Epoch 1
Finished Epoch 1 || Run Time:    7.8 | Load Time:    1.2 || F1:  43.01 | Prec:  29.70 | Rec:  77.92 || Ex/s:  36.00

===>  EVAL Epoch 1
Finished Epoch 1 || Run Time:    1.0 | Load Time:    0.4 || F1:  52.17 | Prec:  35.29 | Rec: 100.00 || Ex/s:  78.33

* Best F1: 52.17391304347826
Saving best model...
Done.
---------------------

===>  TRAIN Epoch 2
Finished Epoch 2 || Run Time:    7.7 | Load Time:    1.2 || F1:  74.74 | Prec:  62.83 | Rec:  92.21 || Ex/s:  36.04

===>  EVAL Epoch 2
Finished Epoch 2 || Run Time:    1.1 | Load Time:    0.4 || F1:  77.19 | Prec:  66.67 | Rec:  91.67 || Ex/s:  73.15

* Best F1: 77.19298245614036
Saving best model...
Done.
---------------------

===>  TRAIN Epoch 3
Finished Epoch 3 || Run Time:    7.7 | Load Time:    1.2 || F1:  85.06 | Prec:  76.29 | Rec:  96.10 || Ex/s:  36.19

===>  EVAL Epoch 3
Finished Epoch 3 || Run Time:    0.9 | Load Time:    0.4 || F1:  77.97 | Prec:  65.71 | Rec:  95.

Finished Epoch 27 || Run Time:    0.9 | Load Time:    0.4 || F1:  90.20 | Prec:  85.19 | Rec:  95.83 || Ex/s:  84.62

---------------------

===>  TRAIN Epoch 28
Finished Epoch 28 || Run Time:    7.4 | Load Time:    1.2 || F1: 100.00 | Prec: 100.00 | Rec: 100.00 || Ex/s:  37.49

===>  EVAL Epoch 28
Finished Epoch 28 || Run Time:    0.9 | Load Time:    0.4 || F1:  90.20 | Prec:  85.19 | Rec:  95.83 || Ex/s:  81.81

---------------------

===>  TRAIN Epoch 29
Finished Epoch 29 || Run Time:    7.4 | Load Time:    1.2 || F1: 100.00 | Prec: 100.00 | Rec: 100.00 || Ex/s:  37.61

===>  EVAL Epoch 29
Finished Epoch 29 || Run Time:    0.9 | Load Time:    0.4 || F1:  90.20 | Prec:  85.19 | Rec:  95.83 || Ex/s:  88.04

---------------------

===>  TRAIN Epoch 30
Finished Epoch 30 || Run Time:    7.0 | Load Time:    1.2 || F1: 100.00 | Prec: 100.00 | Rec: 100.00 || Ex/s:  39.25

===>  EVAL Epoch 30
Finished Epoch 30 || Run Time:    0.9 | Load Time:    0.4 || F1:  90.20 | Prec:  85.19 | Rec:  95.83

95.83333333333333

In [28]:
hybrid_model.run_eval(test)

===>  EVAL Epoch 5
Finished Epoch 5 || Run Time:    0.6 | Load Time:    0.4 || F1:  75.76 | Prec:  71.43 | Rec:  80.65 || Ex/s: 102.66



75.75757575757575

In [32]:
test_pos_altered_price = dm.data.process_unlabeled('sample_data/itunes-amazon/exp6/test_positives_altered_price.csv'
                                                   ,trained_model=hybrid_model,ignore_columns=['id','label'])

In [33]:
pred_new_model = hybrid_model.run_prediction(test_pos_altered_price,output_attributes=True)

===>  PREDICT Epoch 5
Finished Epoch 5 || Run Time:    0.1 | Load Time:    0.1 || F1:   0.00 | Prec:   0.00 | Rec:   0.00 || Ex/s:   0.00



In [34]:
pred_neg_new_model = pred_new_model[pred_new_model['match_score']<=0.5]
pred_neg_new_model.shape

(13, 18)

In [35]:
test_pos = dm.data.process_unlabeled('sample_data/itunes-amazon/test_positives.csv',
                                    trained_model=hybrid_model,ignore_columns=['id','label'])

In [40]:
pred_standardtest_newmodel = hybrid_model.run_prediction(test_pos,output_attributes=True)

===>  PREDICT Epoch 5
Finished Epoch 5 || Run Time:    0.2 | Load Time:    0.2 || F1:   0.00 | Prec:   0.00 | Rec:   0.00 || Ex/s:   0.00



In [43]:
pred_standardtest_newmodel_negative = pred_standardtest_newmodel[pred_standardtest_newmodel['match_score']<=0.5]
pred_neg_new_model.shape

(13, 18)

## Try to alter price in test set,

In [89]:
test_pos = pd.read_csv('sample_data/itunes-amazon/test_positives.csv')

In [90]:
alter_price(test_pos,'$ 0.99','$ 0.99')
test_pos.to_csv('sample_data/itunes-amazon/exp6/test_pos_099price.csv',index=False)

In [91]:
test_pos.shape

(31, 18)

In [92]:
test_pos_099price = dm.data.process_unlabeled('sample_data/itunes-amazon/exp6/test_pos_099price.csv'
                                                   ,trained_model=hybrid_model,ignore_columns=['id','label'])

In [93]:
preds = hybrid_model.run_prediction(test_pos_099price,output_attributes=True)
negative_preds = preds[preds['match_score']<=0.5]

===>  PREDICT Epoch 5
Finished Epoch 5 || Run Time:    0.1 | Load Time:    0.1 || F1:   0.00 | Prec:   0.00 | Rec:   0.00 || Ex/s:   0.00



In [95]:
negative_preds.shape

(0, 18)

## Create new dataset with balanced price

In [6]:
def set_balanced_price(df):
    for idx,row in df.iterrows():
        if idx%2 ==0 :
            df.at[idx,'left_Price'] = '$ 0.99'
            df.at[idx,'right_Price'] ='$ 0.99'
        else:
            df.at[idx,'left_Price'] = '$ 1.29'
            df.at[idx,'right_Price'] ='$ 1.29'

In [7]:
set_balanced_price(train)
set_balanced_price(test)
set_balanced_price(validation)

In [13]:
get_price_distribution(train_positives)

{'$ 1.29 $ 1.29': 41, '$ 0.99 $ 0.99': 36}

In [12]:
get_price_distribution(train_negatives)

{'$ 0.99 $ 0.99': 126, '$ 1.29 $ 1.29': 120}

In [14]:
train.to_csv('sample_data/itunes-amazon/exp6/train_balanced.csv',index=False)
validation.to_csv('sample_data/itunes-amazon/exp6/validation_balanced.csv',index=False)
test.to_csv('sample_data/itunes-amazon/exp6/test_balanced.csv',index=False)

In [16]:
train_negatives = train[train['label']==0]
validation_negatives = validation[validation['label']==0]
test_negatives = test[test['label']==0]

In [17]:
train_positives = train[train['label']==1]
validation_positives = validation[validation['label']==1]
test_positives = test[test['label']==1]

In [18]:
negatives_df = train_negatives.append(validation_negatives,ignore_index=True)
negatives_df = negatives_df.append(test_negatives,ignore_index=True)
positives_df = train_positives.append(validation_positives,ignore_index=True)
positives_df = positives_df.append(test_positives,ignore_index=True)

In [19]:
negatives_df.to_csv('sample_data/itunes-amazon/exp6/negatives_balanced.csv',index=False)
positives_df.to_csv('sample_data/itunes-amazon/exp6/positives_balanced.csv',index=False)

In [20]:
get_price_distribution(negatives_df)

{'$ 0.99 $ 0.99': 200, '$ 1.29 $ 1.29': 207}

In [21]:
get_price_distribution(positives_df)

{'$ 1.29 $ 1.29': 62, '$ 0.99 $ 0.99': 70}

### Train model with balanced dataset

In [27]:
train,validation,test = dm.data.process('sample_data/itunes-amazon/exp6',train='train_balanced.csv',validation='validation_balanced.csv',
                                       test='test_balanced.csv')

In [28]:
model = dm.MatchingModel(attr_summarizer='hybrid')

In [29]:
model.run_train(train,validation,best_save_path='models/hybrid_balanced_data.pth',epochs=20,batch_size=16,
               pos_neg_ratio=4)

* Number of trainable parameters: 17757810
===>  TRAIN Epoch 1
Finished Epoch 1 || Run Time:    7.6 | Load Time:    1.2 || F1:  52.36 | Prec:  39.10 | Rec:  79.22 || Ex/s:  36.55

===>  EVAL Epoch 1
Finished Epoch 1 || Run Time:    1.0 | Load Time:    0.4 || F1:  54.55 | Prec:  39.62 | Rec:  87.50 || Ex/s:  82.10

* Best F1: 54.54545454545455
Saving best model...
Done.
---------------------

===>  TRAIN Epoch 2
Finished Epoch 2 || Run Time:    7.2 | Load Time:    1.1 || F1:  71.50 | Prec:  59.48 | Rec:  89.61 || Ex/s:  38.95

===>  EVAL Epoch 2
Finished Epoch 2 || Run Time:    0.9 | Load Time:    0.3 || F1:  70.59 | Prec:  66.67 | Rec:  75.00 || Ex/s:  89.84

* Best F1: 70.58823529411764
Saving best model...
Done.
---------------------

===>  TRAIN Epoch 3
Finished Epoch 3 || Run Time:    7.6 | Load Time:    1.2 || F1:  86.21 | Prec:  77.32 | Rec:  97.40 || Ex/s:  36.61

===>  EVAL Epoch 3
Finished Epoch 3 || Run Time:    0.8 | Load Time:    0.3 || F1:  82.35 | Prec:  77.78 | Rec:  87.

88.46153846153845

In [30]:
model.run_eval(test)

===>  EVAL Epoch 4
Finished Epoch 4 || Run Time:    0.5 | Load Time:    0.4 || F1:  82.54 | Prec:  81.25 | Rec:  83.87 || Ex/s: 117.79



82.53968253968254

### Calculate Ri with new model

In [49]:
from utilities_functions.intermediate_layer_extraction import Hook,return_layer_input_output
from deepmatcher.data import MatchingIterator
import torch

In [35]:
attributes = ['Song_Name','Artist_Name','Album_Name','Genre','Price','CopyRight','Time','Released']
negative_dataset = dm.data.process(path='sample_data/itunes-amazon/exp6',train='negatives_balanced.csv',
                            validation='validation_balanced.csv',cache='ncache.pth')

In [36]:
classifier = model.classifier
hookF_classifier = []
hookF_classifier.append(Hook(classifier))

In [39]:
batch_size = 32
splits = MatchingIterator.splits(negative_dataset,batch_size=batch_size)
negative_batches = []
for batch in splits[0]:
    negative_batches.append(batch)

In [40]:
negative_classifier_inputs = []
negative_classifier_outputs = []
for batch in negative_batches:
    classifier_input,classifier_output = return_layer_input_output(hookF_classifier,batch,model)
    negative_classifier_inputs.append(classifier_input)
    negative_classifier_outputs.append(classifier_output)

In [41]:
negative_classifier_inputs = list(map(lambda x: x[0][0],negative_classifier_inputs))
negative_classifier_outputs = list(map(lambda x: x[0][0],negative_classifier_outputs))

In [42]:
positive_dataset= dm.data.process(path='sample_data/itunes-amazon/exp6',train='positives_balanced.csv',
                            validation='validation_balanced.csv',cache='pcache.pth')

In [43]:
batch_size = 32
splits = MatchingIterator.splits(positive_dataset,batch_size=batch_size)
positive_batches = []
for batch in splits[0]:
    positive_batches.append(batch)

In [44]:
positive_classifier_inputs = []
positive_classifier_outputs = []
for batch in positive_batches:
    classifier_input,classifier_output = return_layer_input_output(hookF_classifier,batch,model)
    positive_classifier_inputs.append(classifier_input)
    positive_classifier_outputs.append(classifier_output)

In [45]:
positive_classifier_inputs = list(map(lambda x: x[0][0],positive_classifier_inputs))
positive_classifier_outputs = list(map(lambda x: x[0][0],positive_classifier_outputs))

In [46]:
from utilities_functions.distance_measures import find_smallest_variation_to_change

In [47]:
attribute_lenght= len(attributes)

current_sample = 0
#each column of this matrix is related to a specific attribute
negatives_ri_matrix = []
for batch in negative_classifier_inputs:
    for index in range(len(batch)):
        current_sample_ris = []
        for j,attribute in enumerate(attributes):
            print('Processing sample {} with attribute {}'.format(current_sample,attribute))
            it,variation = find_smallest_variation_to_change(model.classifier,batch,index,j,1)
            current_sample_ris.append(variation)
        negatives_ri_matrix.append(current_sample_ris)
        current_sample+=1

Processing sample 0 with attribute Song_Name
Processing sample 0 with attribute Artist_Name
Processing sample 0 with attribute Album_Name
Processing sample 0 with attribute Genre
Processing sample 0 with attribute Price
Processing sample 0 with attribute CopyRight
Processing sample 0 with attribute Time
Processing sample 0 with attribute Released
Processing sample 1 with attribute Song_Name
Processing sample 1 with attribute Artist_Name
Processing sample 1 with attribute Album_Name
Processing sample 1 with attribute Genre
Processing sample 1 with attribute Price
Processing sample 1 with attribute CopyRight
Processing sample 1 with attribute Time
Processing sample 1 with attribute Released
Processing sample 2 with attribute Song_Name
Processing sample 2 with attribute Artist_Name
Processing sample 2 with attribute Album_Name
Processing sample 2 with attribute Genre
Processing sample 2 with attribute Price
Processing sample 2 with attribute CopyRight
Processing sample 2 with attribute Ti

Processing sample 23 with attribute Album_Name
Processing sample 23 with attribute Genre
Processing sample 23 with attribute Price
Processing sample 23 with attribute CopyRight
Processing sample 23 with attribute Time
Processing sample 23 with attribute Released
Processing sample 24 with attribute Song_Name
Processing sample 24 with attribute Artist_Name
Processing sample 24 with attribute Album_Name
Processing sample 24 with attribute Genre
Processing sample 24 with attribute Price
Processing sample 24 with attribute CopyRight
Processing sample 24 with attribute Time
Processing sample 24 with attribute Released
Processing sample 25 with attribute Song_Name
Processing sample 25 with attribute Artist_Name
Processing sample 25 with attribute Album_Name
Processing sample 25 with attribute Genre
Processing sample 25 with attribute Price
Processing sample 25 with attribute CopyRight
Processing sample 25 with attribute Time
Processing sample 25 with attribute Released
Processing sample 26 wi

Processing sample 46 with attribute Album_Name
Processing sample 46 with attribute Genre
Processing sample 46 with attribute Price
Processing sample 46 with attribute CopyRight
Processing sample 46 with attribute Time
Processing sample 46 with attribute Released
Processing sample 47 with attribute Song_Name
Processing sample 47 with attribute Artist_Name
Processing sample 47 with attribute Album_Name
Processing sample 47 with attribute Genre
Processing sample 47 with attribute Price
Processing sample 47 with attribute CopyRight
Processing sample 47 with attribute Time
Processing sample 47 with attribute Released
Processing sample 48 with attribute Song_Name
Processing sample 48 with attribute Artist_Name
Processing sample 48 with attribute Album_Name
Processing sample 48 with attribute Genre
Processing sample 48 with attribute Price
Processing sample 48 with attribute CopyRight
Processing sample 48 with attribute Time
Processing sample 48 with attribute Released
Processing sample 49 wi

Processing sample 69 with attribute Album_Name
Processing sample 69 with attribute Genre
Processing sample 69 with attribute Price
Processing sample 69 with attribute CopyRight
Processing sample 69 with attribute Time
Processing sample 69 with attribute Released
Processing sample 70 with attribute Song_Name
Processing sample 70 with attribute Artist_Name
Processing sample 70 with attribute Album_Name
Processing sample 70 with attribute Genre
Processing sample 70 with attribute Price
Processing sample 70 with attribute CopyRight
Processing sample 70 with attribute Time
Processing sample 70 with attribute Released
Processing sample 71 with attribute Song_Name
Processing sample 71 with attribute Artist_Name
Processing sample 71 with attribute Album_Name
Processing sample 71 with attribute Genre
Processing sample 71 with attribute Price
Processing sample 71 with attribute CopyRight
Processing sample 71 with attribute Time
Processing sample 71 with attribute Released
Processing sample 72 wi

Processing sample 92 with attribute Album_Name
Processing sample 92 with attribute Genre
Processing sample 92 with attribute Price
Processing sample 92 with attribute CopyRight
Processing sample 92 with attribute Time
Processing sample 92 with attribute Released
Processing sample 93 with attribute Song_Name
Processing sample 93 with attribute Artist_Name
Processing sample 93 with attribute Album_Name
Processing sample 93 with attribute Genre
Processing sample 93 with attribute Price
Processing sample 93 with attribute CopyRight
Processing sample 93 with attribute Time
Processing sample 93 with attribute Released
Processing sample 94 with attribute Song_Name
Processing sample 94 with attribute Artist_Name
Processing sample 94 with attribute Album_Name
Processing sample 94 with attribute Genre
Processing sample 94 with attribute Price
Processing sample 94 with attribute CopyRight
Processing sample 94 with attribute Time
Processing sample 94 with attribute Released
Processing sample 95 wi

Processing sample 115 with attribute Song_Name
Processing sample 115 with attribute Artist_Name
Processing sample 115 with attribute Album_Name
Processing sample 115 with attribute Genre
Processing sample 115 with attribute Price
Processing sample 115 with attribute CopyRight
Processing sample 115 with attribute Time
Processing sample 115 with attribute Released
Processing sample 116 with attribute Song_Name
Processing sample 116 with attribute Artist_Name
Processing sample 116 with attribute Album_Name
Processing sample 116 with attribute Genre
Processing sample 116 with attribute Price
Processing sample 116 with attribute CopyRight
Processing sample 116 with attribute Time
Processing sample 116 with attribute Released
Processing sample 117 with attribute Song_Name
Processing sample 117 with attribute Artist_Name
Processing sample 117 with attribute Album_Name
Processing sample 117 with attribute Genre
Processing sample 117 with attribute Price
Processing sample 117 with attribute Cop

Processing sample 137 with attribute Price
Processing sample 137 with attribute CopyRight
Processing sample 137 with attribute Time
Processing sample 137 with attribute Released
Processing sample 138 with attribute Song_Name
Processing sample 138 with attribute Artist_Name
Processing sample 138 with attribute Album_Name
Processing sample 138 with attribute Genre
Processing sample 138 with attribute Price
Processing sample 138 with attribute CopyRight
Processing sample 138 with attribute Time
Processing sample 138 with attribute Released
Processing sample 139 with attribute Song_Name
Processing sample 139 with attribute Artist_Name
Processing sample 139 with attribute Album_Name
Processing sample 139 with attribute Genre
Processing sample 139 with attribute Price
Processing sample 139 with attribute CopyRight
Processing sample 139 with attribute Time
Processing sample 139 with attribute Released
Processing sample 140 with attribute Song_Name
Processing sample 140 with attribute Artist_N

Processing sample 160 with attribute Song_Name
Processing sample 160 with attribute Artist_Name
Processing sample 160 with attribute Album_Name
Processing sample 160 with attribute Genre
Processing sample 160 with attribute Price
Processing sample 160 with attribute CopyRight
Processing sample 160 with attribute Time
Processing sample 160 with attribute Released
Processing sample 161 with attribute Song_Name
Processing sample 161 with attribute Artist_Name
Processing sample 161 with attribute Album_Name
Processing sample 161 with attribute Genre
Processing sample 161 with attribute Price
Processing sample 161 with attribute CopyRight
Processing sample 161 with attribute Time
Processing sample 161 with attribute Released
Processing sample 162 with attribute Song_Name
Processing sample 162 with attribute Artist_Name
Processing sample 162 with attribute Album_Name
Processing sample 162 with attribute Genre
Processing sample 162 with attribute Price
Processing sample 162 with attribute Cop

Processing sample 183 with attribute Artist_Name
Processing sample 183 with attribute Album_Name
Processing sample 183 with attribute Genre
Processing sample 183 with attribute Price
Processing sample 183 with attribute CopyRight
Processing sample 183 with attribute Time
Processing sample 183 with attribute Released
Processing sample 184 with attribute Song_Name
Processing sample 184 with attribute Artist_Name
Processing sample 184 with attribute Album_Name
Processing sample 184 with attribute Genre
Processing sample 184 with attribute Price
Processing sample 184 with attribute CopyRight
Processing sample 184 with attribute Time
Processing sample 184 with attribute Released
Processing sample 185 with attribute Song_Name
Processing sample 185 with attribute Artist_Name
Processing sample 185 with attribute Album_Name
Processing sample 185 with attribute Genre
Processing sample 185 with attribute Price
Processing sample 185 with attribute CopyRight
Processing sample 185 with attribute Tim

Processing sample 205 with attribute CopyRight
Processing sample 205 with attribute Time
Processing sample 205 with attribute Released
Processing sample 206 with attribute Song_Name
Processing sample 206 with attribute Artist_Name
Processing sample 206 with attribute Album_Name
Processing sample 206 with attribute Genre
Processing sample 206 with attribute Price
Processing sample 206 with attribute CopyRight
Processing sample 206 with attribute Time
Processing sample 206 with attribute Released
Processing sample 207 with attribute Song_Name
Processing sample 207 with attribute Artist_Name
Processing sample 207 with attribute Album_Name
Processing sample 207 with attribute Genre
Processing sample 207 with attribute Price
Processing sample 207 with attribute CopyRight
Processing sample 207 with attribute Time
Processing sample 207 with attribute Released
Processing sample 208 with attribute Song_Name
Processing sample 208 with attribute Artist_Name
Processing sample 208 with attribute Al

Processing sample 228 with attribute Artist_Name
Processing sample 228 with attribute Album_Name
Processing sample 228 with attribute Genre
Processing sample 228 with attribute Price
Processing sample 228 with attribute CopyRight
Processing sample 228 with attribute Time
Processing sample 228 with attribute Released
Processing sample 229 with attribute Song_Name
Processing sample 229 with attribute Artist_Name
Processing sample 229 with attribute Album_Name
Processing sample 229 with attribute Genre
Processing sample 229 with attribute Price
Processing sample 229 with attribute CopyRight
Processing sample 229 with attribute Time
Processing sample 229 with attribute Released
Processing sample 230 with attribute Song_Name
Processing sample 230 with attribute Artist_Name
Processing sample 230 with attribute Album_Name
Processing sample 230 with attribute Genre
Processing sample 230 with attribute Price
Processing sample 230 with attribute CopyRight
Processing sample 230 with attribute Tim

Processing sample 250 with attribute CopyRight
Processing sample 250 with attribute Time
Processing sample 250 with attribute Released
Processing sample 251 with attribute Song_Name
Processing sample 251 with attribute Artist_Name
Processing sample 251 with attribute Album_Name
Processing sample 251 with attribute Genre
Processing sample 251 with attribute Price
Processing sample 251 with attribute CopyRight
Processing sample 251 with attribute Time
Processing sample 251 with attribute Released
Processing sample 252 with attribute Song_Name
Processing sample 252 with attribute Artist_Name
Processing sample 252 with attribute Album_Name
Processing sample 252 with attribute Genre
Processing sample 252 with attribute Price
Processing sample 252 with attribute CopyRight
Processing sample 252 with attribute Time
Processing sample 252 with attribute Released
Processing sample 253 with attribute Song_Name
Processing sample 253 with attribute Artist_Name
Processing sample 253 with attribute Al

Processing sample 273 with attribute Artist_Name
Processing sample 273 with attribute Album_Name
Processing sample 273 with attribute Genre
Processing sample 273 with attribute Price
Processing sample 273 with attribute CopyRight
Processing sample 273 with attribute Time
Processing sample 273 with attribute Released
Processing sample 274 with attribute Song_Name
Processing sample 274 with attribute Artist_Name
Processing sample 274 with attribute Album_Name
Processing sample 274 with attribute Genre
Processing sample 274 with attribute Price
Processing sample 274 with attribute CopyRight
Processing sample 274 with attribute Time
Processing sample 274 with attribute Released
Processing sample 275 with attribute Song_Name
Processing sample 275 with attribute Artist_Name
Processing sample 275 with attribute Album_Name
Processing sample 275 with attribute Genre
Processing sample 275 with attribute Price
Processing sample 275 with attribute CopyRight
Processing sample 275 with attribute Tim

Processing sample 296 with attribute Artist_Name
Processing sample 296 with attribute Album_Name
Processing sample 296 with attribute Genre
Processing sample 296 with attribute Price
Processing sample 296 with attribute CopyRight
Processing sample 296 with attribute Time
Processing sample 296 with attribute Released
Processing sample 297 with attribute Song_Name
Processing sample 297 with attribute Artist_Name
Processing sample 297 with attribute Album_Name
Processing sample 297 with attribute Genre
Processing sample 297 with attribute Price
Processing sample 297 with attribute CopyRight
Processing sample 297 with attribute Time
Processing sample 297 with attribute Released
Processing sample 298 with attribute Song_Name
Processing sample 298 with attribute Artist_Name
Processing sample 298 with attribute Album_Name
Processing sample 298 with attribute Genre
Processing sample 298 with attribute Price
Processing sample 298 with attribute CopyRight
Processing sample 298 with attribute Tim

Processing sample 318 with attribute CopyRight
Processing sample 318 with attribute Time
Processing sample 318 with attribute Released
Processing sample 319 with attribute Song_Name
Processing sample 319 with attribute Artist_Name
Processing sample 319 with attribute Album_Name
Processing sample 319 with attribute Genre
Processing sample 319 with attribute Price
Processing sample 319 with attribute CopyRight
Processing sample 319 with attribute Time
Processing sample 319 with attribute Released
Processing sample 320 with attribute Song_Name
Processing sample 320 with attribute Artist_Name
Processing sample 320 with attribute Album_Name
Processing sample 320 with attribute Genre
Processing sample 320 with attribute Price
Processing sample 320 with attribute CopyRight
Processing sample 320 with attribute Time
Processing sample 320 with attribute Released
Processing sample 321 with attribute Song_Name
Processing sample 321 with attribute Artist_Name
Processing sample 321 with attribute Al

Processing sample 341 with attribute Artist_Name
Processing sample 341 with attribute Album_Name
Processing sample 341 with attribute Genre
Processing sample 341 with attribute Price
Processing sample 341 with attribute CopyRight
Processing sample 341 with attribute Time
Processing sample 341 with attribute Released
Processing sample 342 with attribute Song_Name
Processing sample 342 with attribute Artist_Name
Processing sample 342 with attribute Album_Name
Processing sample 342 with attribute Genre
Processing sample 342 with attribute Price
Processing sample 342 with attribute CopyRight
Processing sample 342 with attribute Time
Processing sample 342 with attribute Released
Processing sample 343 with attribute Song_Name
Processing sample 343 with attribute Artist_Name
Processing sample 343 with attribute Album_Name
Processing sample 343 with attribute Genre
Processing sample 343 with attribute Price
Processing sample 343 with attribute CopyRight
Processing sample 343 with attribute Tim

Processing sample 363 with attribute CopyRight
Processing sample 363 with attribute Time
Processing sample 363 with attribute Released
Processing sample 364 with attribute Song_Name
Processing sample 364 with attribute Artist_Name
Processing sample 364 with attribute Album_Name
Processing sample 364 with attribute Genre
Processing sample 364 with attribute Price
Processing sample 364 with attribute CopyRight
Processing sample 364 with attribute Time
Processing sample 364 with attribute Released
Processing sample 365 with attribute Song_Name
Processing sample 365 with attribute Artist_Name
Processing sample 365 with attribute Album_Name
Processing sample 365 with attribute Genre
Processing sample 365 with attribute Price
Processing sample 365 with attribute CopyRight
Processing sample 365 with attribute Time
Processing sample 365 with attribute Released
Processing sample 366 with attribute Song_Name
Processing sample 366 with attribute Artist_Name
Processing sample 366 with attribute Al

Processing sample 386 with attribute Artist_Name
Processing sample 386 with attribute Album_Name
Processing sample 386 with attribute Genre
Processing sample 386 with attribute Price
Processing sample 386 with attribute CopyRight
Processing sample 386 with attribute Time
Processing sample 386 with attribute Released
Processing sample 387 with attribute Song_Name
Processing sample 387 with attribute Artist_Name
Processing sample 387 with attribute Album_Name
Processing sample 387 with attribute Genre
Processing sample 387 with attribute Price
Processing sample 387 with attribute CopyRight
Processing sample 387 with attribute Time
Processing sample 387 with attribute Released
Processing sample 388 with attribute Song_Name
Processing sample 388 with attribute Artist_Name
Processing sample 388 with attribute Album_Name
Processing sample 388 with attribute Genre
Processing sample 388 with attribute Price
Processing sample 388 with attribute CopyRight
Processing sample 388 with attribute Tim

In [50]:
ri_norms_negative_samples = []
for ri_list in negatives_ri_matrix:
    ri_norms_negative_samples.append(list(map(lambda x:torch.norm(x).data[0],ri_list)))

In [51]:
import pandas as pd
variation_df = pd.DataFrame(data= ri_norms_negative_samples,columns=attributes)

In [53]:
variation_df.to_csv('experiments-results/exp6/negatives_ri.csv',index=False)

In [55]:
attribute_lenght= len(attributes)
positive_samples_ri_matrix = []
current_sample = 0

for batch in positive_classifier_inputs:
    for index in range(len(batch)):
        current_sample_ris = []
        for j,attribute in enumerate(attributes):
            print('Processing sample {} with attribute {}'.format(current_sample,attribute))
            it,variation = find_smallest_variation_to_change(model.classifier,batch,index,j,0)
            current_sample_ris.append(variation)
        positive_samples_ri_matrix.append(current_sample_ris)
        current_sample+=1

Processing sample 0 with attribute Song_Name
Processing sample 0 with attribute Artist_Name
Processing sample 0 with attribute Album_Name
Processing sample 0 with attribute Genre
Processing sample 0 with attribute Price
Processing sample 0 with attribute CopyRight
Processing sample 0 with attribute Time
Processing sample 0 with attribute Released
Processing sample 1 with attribute Song_Name
Processing sample 1 with attribute Artist_Name
Processing sample 1 with attribute Album_Name
Processing sample 1 with attribute Genre
Processing sample 1 with attribute Price
Processing sample 1 with attribute CopyRight
Processing sample 1 with attribute Time
Processing sample 1 with attribute Released
Processing sample 2 with attribute Song_Name
Processing sample 2 with attribute Artist_Name
Processing sample 2 with attribute Album_Name
Processing sample 2 with attribute Genre
Processing sample 2 with attribute Price
Processing sample 2 with attribute CopyRight
Processing sample 2 with attribute Ti

Processing sample 23 with attribute Album_Name
Processing sample 23 with attribute Genre
Processing sample 23 with attribute Price
Processing sample 23 with attribute CopyRight
Processing sample 23 with attribute Time
Processing sample 23 with attribute Released
Processing sample 24 with attribute Song_Name
Processing sample 24 with attribute Artist_Name
Processing sample 24 with attribute Album_Name
Processing sample 24 with attribute Genre
Processing sample 24 with attribute Price
Processing sample 24 with attribute CopyRight
Processing sample 24 with attribute Time
Processing sample 24 with attribute Released
Processing sample 25 with attribute Song_Name
Processing sample 25 with attribute Artist_Name
Processing sample 25 with attribute Album_Name
Processing sample 25 with attribute Genre
Processing sample 25 with attribute Price
Processing sample 25 with attribute CopyRight
Processing sample 25 with attribute Time
Processing sample 25 with attribute Released
Processing sample 26 wi

Processing sample 46 with attribute Album_Name
Processing sample 46 with attribute Genre
Processing sample 46 with attribute Price
Processing sample 46 with attribute CopyRight
Processing sample 46 with attribute Time
Processing sample 46 with attribute Released
Processing sample 47 with attribute Song_Name
Processing sample 47 with attribute Artist_Name
Processing sample 47 with attribute Album_Name
Processing sample 47 with attribute Genre
Processing sample 47 with attribute Price
Processing sample 47 with attribute CopyRight
Processing sample 47 with attribute Time
Processing sample 47 with attribute Released
Processing sample 48 with attribute Song_Name
Processing sample 48 with attribute Artist_Name
Processing sample 48 with attribute Album_Name
Processing sample 48 with attribute Genre
Processing sample 48 with attribute Price
Processing sample 48 with attribute CopyRight
Processing sample 48 with attribute Time
Processing sample 48 with attribute Released
Processing sample 49 wi

Processing sample 69 with attribute Album_Name
Processing sample 69 with attribute Genre
Processing sample 69 with attribute Price
Processing sample 69 with attribute CopyRight
Processing sample 69 with attribute Time
Processing sample 69 with attribute Released
Processing sample 70 with attribute Song_Name
Processing sample 70 with attribute Artist_Name
Processing sample 70 with attribute Album_Name
Processing sample 70 with attribute Genre
Processing sample 70 with attribute Price
Processing sample 70 with attribute CopyRight
Processing sample 70 with attribute Time
Processing sample 70 with attribute Released
Processing sample 71 with attribute Song_Name
Processing sample 71 with attribute Artist_Name
Processing sample 71 with attribute Album_Name
Processing sample 71 with attribute Genre
Processing sample 71 with attribute Price
Processing sample 71 with attribute CopyRight
Processing sample 71 with attribute Time
Processing sample 71 with attribute Released
Processing sample 72 wi

Processing sample 92 with attribute Album_Name
Processing sample 92 with attribute Genre
Processing sample 92 with attribute Price
Processing sample 92 with attribute CopyRight
Processing sample 92 with attribute Time
Processing sample 92 with attribute Released
Processing sample 93 with attribute Song_Name
Processing sample 93 with attribute Artist_Name
Processing sample 93 with attribute Album_Name
Processing sample 93 with attribute Genre
Processing sample 93 with attribute Price
Processing sample 93 with attribute CopyRight
Processing sample 93 with attribute Time
Processing sample 93 with attribute Released
Processing sample 94 with attribute Song_Name
Processing sample 94 with attribute Artist_Name
Processing sample 94 with attribute Album_Name
Processing sample 94 with attribute Genre
Processing sample 94 with attribute Price
Processing sample 94 with attribute CopyRight
Processing sample 94 with attribute Time
Processing sample 94 with attribute Released
Processing sample 95 wi

Processing sample 115 with attribute Song_Name
Processing sample 115 with attribute Artist_Name
Processing sample 115 with attribute Album_Name
Processing sample 115 with attribute Genre
Processing sample 115 with attribute Price
Processing sample 115 with attribute CopyRight
Processing sample 115 with attribute Time
Processing sample 115 with attribute Released
Processing sample 116 with attribute Song_Name
Processing sample 116 with attribute Artist_Name
Processing sample 116 with attribute Album_Name
Processing sample 116 with attribute Genre
Processing sample 116 with attribute Price
Processing sample 116 with attribute CopyRight
Processing sample 116 with attribute Time
Processing sample 116 with attribute Released
Processing sample 117 with attribute Song_Name
Processing sample 117 with attribute Artist_Name
Processing sample 117 with attribute Album_Name
Processing sample 117 with attribute Genre
Processing sample 117 with attribute Price
Processing sample 117 with attribute Cop

In [56]:
ri_norms = []
for ri_list in positive_samples_ri_matrix:
    ri_norms.append(list(map(lambda x:torch.norm(x).data[0],ri_list)))

In [57]:
import pandas as pd
variation_df = pd.DataFrame(data= ri_norms,columns=attributes)
variation_df.head(10)

Unnamed: 0,Song_Name,Artist_Name,Album_Name,Genre,Price,CopyRight,Time,Released
0,0.865432,2.245889,1.374159,2.525168,2.80603,2.623068,0.688157,2.527538
1,4.221435,10.450848,6.253699,10.364573,10.068408,11.150321,3.325911,10.685292
2,6.366452,19.075026,11.094864,17.653036,16.906139,19.103495,4.998214,18.701216
3,1.054276,2.558052,1.565852,2.698159,2.743334,2.811986,0.838619,2.670394
4,2.518672,6.65987,4.038453,7.446851,8.227221,7.826386,2.036325,7.609153
5,7.572473,20.7148,11.991455,19.615816,19.509956,21.74861,5.844666,20.953079
6,1.59439,4.246817,2.585542,4.830194,5.528861,5.039536,1.268687,4.894966
7,6.635593,17.792227,10.411633,17.360876,17.062279,19.233479,5.174418,18.353481
8,3.599021,8.273602,4.978783,7.908396,7.51279,8.429854,2.87165,8.140283
9,3.031365,7.590149,4.538692,7.770656,7.808692,8.228191,2.356901,7.76858


In [58]:
variation_df.to_csv('experiments-results/exp6/positives_ri.csv',index=False)