In [1]:
from nmt_model import NMT
import torch
params = torch.load('model_0707.bin', map_location=lambda storage, loc: storage)
args = params['args']
model = NMT(vocab=params['vocab'], **args)
model.load_state_dict(params['state_dict'])

torch.save(params['state_dict'], 'model_bi_0707')

In [2]:
params.keys()

dict_keys(['args', 'vocab', 'state_dict'])

In [3]:
params['vocab'].vocs['king'], params['vocab'].vocs['man'], params['vocab'].vocs['queen'], params['vocab'].vocs['woman']

(553, 198, 2850, 593)

In [12]:
aa = torch.tensor([1,3,2,4])
aa.sort(0,descending=True)

(tensor([4, 3, 2, 1]), tensor([3, 1, 2, 0]))

In [3]:
%%writefile net_util.py

from tqdm.notebook import tqdm

def get_nn_avg_dist(emb, query, knn):
    """
    Compute the average distance of the `knn` nearest neighbors
    for a given set of embeddings and queries.
    Use Faiss if available.
    """
    FAISS_AVAILABLE = False
    
    if FAISS_AVAILABLE:
        emb = emb.cpu().numpy()
        query = query.cpu().numpy()
        if hasattr(faiss, 'StandardGpuResources'):
            # gpu mode
            res = faiss.StandardGpuResources()
            config = faiss.GpuIndexFlatConfig()
            config.device = 0
            index = faiss.GpuIndexFlatIP(res, emb.shape[1], config)
        else:
            # cpu mode
            index = faiss.IndexFlatIP(emb.shape[1])
        index.add(emb)
        distances, _ = index.search(query, knn)
        return distances.mean(1)
    else:
        bs = 1024
        all_distances = []
        emb = emb.transpose(0, 1).contiguous()
        for i in range(0, query.shape[0], bs):
            distances = query[i:i + bs].mm(emb)
            best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True)
            all_distances.append(best_distances.mean(1).cpu())
        all_distances = torch.cat(all_distances)
        return all_distances


def get_candidates(emb1, emb2, params):
    """
    Get best translation pairs candidates.
    """

    bs = 128

    all_scores = []
    all_targets = []

    # number of source words to consider
    n_src = emb1.size(0)
    if params['dico_max_rank'] > 0 and not params['dico_method'].startswith('invsm_beta_'):
        n_src = min(params['dico_max_rank'], n_src)


    # contextual dissimilarity measure
    if params['dico_method'].startswith('csls_knn_'):

        knn = params['dico_method'][len('csls_knn_'):]
        assert knn.isdigit()
        knn = int(knn)

        # average distances to k nearest neighbors
        average_dist1 = get_nn_avg_dist(emb2, emb1, knn)
        average_dist2 = get_nn_avg_dist(emb1, emb2, knn)
        #average_dist1 = torch.from_numpy(get_nn_avg_dist(emb2, emb1, knn))
        #average_dist2 = torch.from_numpy(get_nn_avg_dist(emb1, emb2, knn))
        #print('check_point_1')
        #average_dist1 = average_dist1.type_as(emb1)
        #print('check_point_2')
        #average_dist2 = average_dist2.type_as(emb2)

        # for every source word
        for i in tqdm(range(0, n_src, bs)):

            # compute target words scores
            scores = emb2.mm(emb1[i:min(n_src, i + bs)].transpose(0, 1)).transpose(0, 1)
            scores.mul_(2)
            scores.sub_(average_dist1[i:min(n_src, i + bs)].unsqueeze(1).expand_as(scores) 
                        + average_dist2.unsqueeze(0).expand_as(scores))
            best_scores, best_targets = scores.topk(2, dim=1, largest=True, sorted=True)

            # update scores / potential targets
            all_scores.append(best_scores.cpu())
            all_targets.append(best_targets.cpu())

        all_scores = torch.cat(all_scores, 0)
        all_targets = torch.cat(all_targets, 0)

    all_pairs = torch.cat([
        torch.arange(0, all_targets.size(0)).long().unsqueeze(1),
        all_targets[:, 0].unsqueeze(1)
    ], 1)

    # sanity check
    assert all_scores.size() == all_pairs.size() == (n_src, 2)

    # sort pairs by score confidence
    diff = all_scores[:, 0] - all_scores[:, 1]
    reordered = diff.sort(0, descending=True)[1]
    #reordered = all_scores.sort(0, descending=True)[1]
    all_scores = all_scores[reordered]
    all_pairs = all_pairs[reordered]
    """
    # max dico words rank
    if params['dico_max_rank'] > 0:
        selected = all_pairs.max(1)[0] <= params['dico_max_rank']
        mask = selected.unsqueeze(1).expand_as(all_scores).clone()
        all_scores = all_scores.masked_select(mask).view(-1, 2)
        all_pairs = all_pairs.masked_select(mask).view(-1, 2)

    # max dico size
    if params['dico_max_size'] > 0:
        all_scores = all_scores[:params['dico_max_size']]
        all_pairs = all_pairs[:params['dico_max_size']]
    
    # min dico size
    diff = all_scores[:, 0] - all_scores[:, 1]
    if params['dico_min_size'] > 0:
        diff[:params['dico_min_size']] = 1e9

    # confidence threshold
    if params['dico_threshold'] > 0:
        mask = diff > params['dico_threshold']
        logger.info("Selected %i / %i pairs above the confidence threshold." % (mask.sum(), diff.size(0)))
        mask = mask.unsqueeze(1).expand_as(all_pairs).clone()
        all_pairs = all_pairs.masked_select(mask).view(-1, 2)
    """
    return all_pairs


def build_dictionary(src_emb, tgt_emb, params, s2t_candidates=None, t2s_candidates=None):
    """
    Build a training dictionary given current embeddings / mapping.
    """
    #logger.info("Building the train dictionary ...")
    s2t = 'S2T' in params['dico_build']
    t2s = 'T2S' in params['dico_build']
    assert s2t or t2s

    if s2t:
        if s2t_candidates is None:
            s2t_candidates = get_candidates(src_emb, tgt_emb, params)
    if t2s:
        if t2s_candidates is None:
            t2s_candidates = get_candidates(tgt_emb, src_emb, params)
        t2s_candidates = torch.cat([t2s_candidates[:, 1:], t2s_candidates[:, :1]], 1)

    if params['dico_build'] == 'S2T':
        dico = s2t_candidates
    elif params['dico_build'] == 'T2S':
        dico = t2s_candidates
    else:
        s2t_candidates = set([(a, b) for a, b in s2t_candidates.numpy()])
        t2s_candidates = set([(a, b) for a, b in t2s_candidates.numpy()])
        if params['dico_build'] == 'S2T|T2S':
            final_pairs = s2t_candidates | t2s_candidates
        else:
            assert params['dico_build'] == 'S2T&T2S'
            final_pairs = s2t_candidates & t2s_candidates
            if len(final_pairs) == 0:
                #logger.warning("Empty intersection ...")
                return None
        dico = torch.LongTensor(list([[int(a), int(b)] for (a, b) in final_pairs]))

    #logger.info('New train dictionary of %i pairs.' % dico.size(0))
    return dico   #.cuda() if params['cuda else dico']

def dict_merge(d1,d2):
    for k,v in d1.items():
        if k in d2.keys():
            d2[k] += v
        else:
            d2[k] = v

        
def save_best(self, to_log, metric):
    """
    Save the best model for the given validation metric.
    """
    # best mapping for the given validation criterion
    if to_log[metric] > self.best_valid_metric:
        # new best mapping
        self.best_valid_metric = to_log[metric]
        logger.info('* Best value for "%s": %.5f' % (metric, to_log[metric]))
        # save the mapping
        W = self.mapping.weight.data.cpu().numpy()
        path = os.path.join(self.params['exp_path'], 'best_mapping.pth')
        logger.info('* Saving the mapping to %s ...' % path)
        torch.save(W, path)


Writing net_util.py


In [163]:
aa = torch.arange(12).view(3,-1)
aa.max(1)
selected = aa.max(1)[0] <= 8
selected

tensor([1, 1, 0], dtype=torch.uint8)

In [168]:
mask = selected.unsqueeze(1).expand_as(aa).clone()
print(mask,aa)
aa.masked_select(mask).view(-1,4)

tensor([[1, 1, 1, 1],
        [1, 1, 1, 1],
        [0, 0, 0, 0]], dtype=torch.uint8) tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


tensor([[0, 1, 2, 3],
        [4, 5, 6, 7]])

In [162]:
aa

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [136]:
aa = 'csls_knn_nnn'
aa.startswith('csls_knn_')

True

In [126]:
src_emb = net.mapping(net.emb[:net.ko_start])
src_emb.size()

torch.Size([54621, 300])

In [127]:
tgt_emb = net.mapping(net.emb[net.ko_start:])
tgt_emb.size()

torch.Size([47110, 300])

In [149]:
aa = torch.arange(12)
aa

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [153]:
torch.tensor([(1,2),(2,3),(3,5)])

tensor([[1, 2],
        [2, 3],
        [3, 5]])

In [155]:
torch.tensor([(1,2),(2,3),(3,5)])[:,0]

tensor([1, 2, 3])

In [152]:
net.emb.size()

torch.Size([101731, 300])

In [2]:
%%writefile net_model.py
import os
import math
import random
import numpy as np
import scipy
import scipy.linalg
import torch
import torch.nn as nn
#import torch.nn.functional as F

#from build_dict import get_candidates, build_dictionary

class Net(nn.Module): #Word Mapping

    def __init__(self, parameters, hidden_size, dropout_rate=0.2):
        super(Net, self).__init__()
        self.hidden_size = hidden_size
        self.ko_start = 54621
        #self.dropout_rate = dropout_rate
        #self.vocab = vocab
        #self.token = ['(', ')', ',', "'", '"','_','<s>','</s>']
        #self.sbol = ['_','^','`']
        self.top_k = 10
        self.some_number = 0.3
        self.best_valid_metric = 0
        self.vocs = parameters['vocab'].vocs
        self.emb = parameters['state_dict']['model_embeddings.vocabs.weight']
        self.lookup = {}
        
        self.mapping = None
        self.mapping = nn.Linear(self.hidden_size, self.hidden_size, bias=False)  
        

    def forward(self, bi_dict):
        
        if type(bi_dict[0][0]) is str:
            bi_dict = torch.tensor([(self.vocs[b[0]], self.vocs[b[1]]) for b in bi_dict])
        print("bi_dict.size : {}".format(bi_dict.size()))
        self.mapping.weight.data = self.procrustes(bi_dict)
        mean_cosine, dico = self.dist_mean_cosine()
        self.save_best(mean_cosine)
       
        return dico, mean_cosine
           
    
    def procrustes(self, dico):
        """
        Find the best orthogonal matrix mapping using the Orthogonal Procrustes problem
        https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
        """
        A = self.emb[dico[:, 0]]
        B = self.emb[dico[:, 1]]
        W = self.mapping.weight.data
        M = B.transpose(0, 1).mm(A).cpu().numpy()
        U, S, V_t = scipy.linalg.svd(M, full_matrices=True)
        W.copy_(torch.from_numpy(U.dot(V_t)).type_as(W)) 
        print("W.size : {}".format(W.size()))
        return W
        
    def dist_mean_cosine(self):
        """
        Mean-cosine model selection criterion.
        """
        n_wds = 20000
        # get normalized embeddings
        #src_emb = self.mapping(self.emb[:self.ko_start])
        #tgt_emb = self.emb[self.ko_start:]
        src_emb = self.mapping(self.emb[:n_wds])
        tgt_emb = self.emb[self.ko_start:self.ko_start+n_wds]
        src_emb = src_emb / src_emb.norm(2, 1, keepdim=True) #.expand_as(src_emb)
        tgt_emb = tgt_emb / tgt_emb.norm(2, 1, keepdim=True) #.expand_as(tgt_emb)

        # build dictionary
        for dico_method in ['csls_knn_10']:
            dico_build = 'S2T'
            dico_max_size = 30000
            # temp params / dictionary generation
            _params = {}
            #_params = deepcopy(self.params)
            _params['dico_method'] = dico_method # 'csls_knn_10'
            _params['dico_build'] = dico_build
            _params['dico_threshold'] = 0
            _params['dico_max_rank'] = 30000
            _params['dico_min_size'] = 0
            _params['dico_max_size'] = dico_max_size
            
            s2t_candidates = get_candidates(src_emb, tgt_emb, _params)
            t2s_candidates = get_candidates(tgt_emb, src_emb, _params)
            dico = build_dictionary(src_emb, tgt_emb, _params, s2t_candidates, t2s_candidates)
            # mean cosine
            if dico is None:
                mean_cosine = -1e9
            else:
                mean_cosine = (src_emb[dico[:dico_max_size, 0]] * tgt_emb[dico[:dico_max_size, 1]]).sum(1).mean()
            #mean_cosine = mean_cosine.item() if isinstance(mean_cosine, torch_tensor) else mean_cosine
            print("Mean cosine (%s method, %s build, %i max size): %.5f"
                        % (dico_method, _params['dico_build'], dico_max_size, mean_cosine))
            #to_log['mean_cosine-%s-%s-%i' % (dico_method, _params['dico_build'], dico_max_size)] = mean_cosine
            
        return mean_cosine, dico
        
    def save_best(self, metric):
        """
        Save the best model for the given validation metric.
        """
        # best mapping for the given validation criterion
        if metric > self.best_valid_metric:
            # new best mapping
            self.best_valid_metric = metric
            #logger.info('* Best value for "%s": %.5f' % (metric, to_log[metric]))
            # save the mapping
            W = self.mapping.weight.data.cpu().numpy()
            path = os.path.join('outputs/', 'best_mapping.pth')
            print('* Saving the mapping to %s ...' % path)
            torch.save(W, path)
    
    
    def make_lookup(self, src, tgt):
        for ws in src:
            scores, w_topk = torch.topk(cos_sim(self.w_map(self.emb[wD[ws]]).expand(batch,self.hidden_size),
                              embed[self.ws2inds(tgt)]), self.top_k)
            wt = w_topk[0]
            scores_sk, ws_topk = torch.topk(cos_sim(self.emb[wD[ws]].expand(batch,self.hidden_size),
                              embed[self.ws2inds(src)]), self.top_k)
            scores_tk, wt_topk = torch.topk(cos_sim(self.emb[wt].expand(batch,self.hidden_size),
                              embed[self.ws2inds(tgt)]), self.top_k)
            CSIL = 2* cos_sim(self.w_map(self.emb[wD[ws]]),self.emb[wt]) - (sum(scores_sk)+ sum(scores_tk)) / self.top_k
            if CSIL > self.some_number:
                self.lookup[ws].append((wt,CSIL))   
        return self.lookup
            
    def ws2inds(self, words):  
        return [self.vocs[w] for w in words]
    
    def cos_sim(self,a,b):
        return sum(a*b)/((sum(a*a)**.5)*(sum(b*b)**.5))

    
def batch_iter(bi_words,b_size):
    batch_size = b_size
    batch_num = math.ceil(len(bi_words) / batch_size)
    index_array = list(range(len(bi_words)))

    np.random.shuffle(index_array)

    for i in range(batch_num):
        indices = index_array[i * batch_size: (i + 1) * batch_size]
        examples = [bi_words[idx] for idx in indices]
        src = [e[0] for e in examples]
        tgt = [e[1] for e in examples]

        yield src, tgt

Writing net_model.py


In [142]:
aa ='a'
type(aa) is str

True

In [5]:
import json
path = '../../../preProject/NMT/preproc_Module/inputs/'

with open(path+'dict_enT.json','r') as f:
    dictE = json.load(f)
with open(path+'dict_koT.json','r') as f:
    dictK = json.load(f)
    
from nmt_model import NMT
import torch
params = torch.load('model_0707.bin', map_location=lambda storage, loc: storage)
args = params['args']
model = NMT(vocab=params['vocab'], **args)
model.load_state_dict(params['state_dict'])

torch.save(params['state_dict'], 'model_bi_0707')



In [53]:
from collections import Counter
bi_words = [(w,sorted(v, key=lambda x:x[1][0], reverse = True)[0][0]) for w,v in dictE.items()]
bi_words += [(sorted(v, key=lambda x:x[1][0], reverse = True)[0][0],w) for w,v in dictK.items()]
net = Net(params,300)
count = Counter()
best = 0
patience = 0
dictionary = torch.tensor([(net.vocs[b[0]], net.vocs[b[1]]) for b in bi_words])
dict =[(w[0],w[1]) for w in dictionary.numpy()]
dict_count = Counter(dict+dict)
dictionary = [w[0] for w in sorted(dict_count.items(), key=lambda x:x[1], reverse=True)[:5000]]



In [55]:
torch.tensor(dictionary).size()

torch.Size([5757, 2])

In [32]:
net.vocs['개인적']

56520

In [44]:
dictionary[:10]

[((605, 56520), 2),
 ((8336, 65100), 2),
 ((74, 53745), 2),
 ((58, 54886), 2),
 ((952, 55309), 2),
 ((486, 55226), 2),
 ((5420, 59024), 2),
 ((7833, 63406), 2),
 ((8827, 64383), 2),
 ((250, 54874), 2)]

In [35]:
len(Counter(list(dictionary)+list(dictionary)))

20700

In [37]:
dict =[(w[0],w[1]) for w in dictionary.numpy()]
len(Counter((dict+dict)))
      

10350

In [15]:
dict_count

count(tensor([[  1210, 113040],
        [ 16672, 130200],
        [   148, 107490],
        ...,
        [119892,  70600],
        [122294,  25284],
        [119374,  23838]]))

In [7]:
def dict_merge(d1,d2):
    for k,v in d1.items():
        if k in d2.keys():
            d2[k] += v
        else:
            d2[k] = v

In [4]:
%%writefile net_run.py

from collections import Counter
from itertools import chain

import json
path = '../../../preProject/NMT/preproc_Module/inputs/'

with open(path+'dict_enT.json','r') as f:
    dictE = json.load(f)
with open(path+'dict_koT.json','r') as f:
    dictK = json.load(f)
    
from nmt_model import NMT
import torch
params = torch.load('model_0707.bin', map_location=lambda storage, loc: storage)
args = params['args']
model = NMT(vocab=params['vocab'], **args)
model.load_state_dict(params['state_dict'])

torch.save(params['state_dict'], 'model_bi_0707')

bi_words = [(w,sorted(v, key=lambda x:x[1][0], reverse = True)[0][0]) for w,v in dictE.items()]
bi_words += [(sorted(v, key=lambda x:x[1][0], reverse = True)[0][0],w) for w,v in dictK.items()]
bi_words = list(set(bi_words))
net = Net(params,300)

best = 0
patience = 0
dict_count = {}

dictionary = torch.tensor([(net.vocs[b[0]], net.vocs[b[1]]) for b in bi_words[:3000]])
count = Counter([(w[0],w[1]) for w in dictionary.numpy()])
dict_merge(count,dict_count)

for i in range(10):
    
    pre_dic = dictionary    
    dictionary, m_cosine = net(dictionary)
    
    print("iteration {}, m_cosine = {}".format(i,m_cosine))
    
    if m_cosine > best:
        best = m_cosine
        patience = 0
    else:
        patience += 1
        print("patience : {}".format(patience))
        if patience >2:
            print("patience : {},  STOP Iteration".format(patience))            
            break

            
    dict_count = Counter(chain(*[[(w[0],w[1]) for w in dct.numpy()] for dct in [pre_dic, dictionary[:2000]]]))
    #count = Counter([(w[0],w[1]) for w in dictionary.numpy()])
    #dict_merge(count,dict_count)
    dictionary = torch.tensor([w[0] for w in sorted(dict_count.items(), key=lambda x:x[1], reverse=True)])

   
    
    

Writing net_run.py


In [71]:
len(dict_count)

12751

In [249]:
dictionary2 = net(dictionary)

bi_dict.size : torch.Size([6717, 2])
W.size : torch.Size([300, 300])


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


check_point_1
check_point_2


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


check_point_1
check_point_2


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Mean cosine (csls_knn_10 method, S2T build, 10000 max size): 0.24023


In [246]:
len(dictionary)

6717

In [12]:
bi_words[:10]

[('clon', '복제'),
 ('spanish', '스페인어'),
 ('fate', '운명'),
 ('jong-min', '김종민'),
 ('dynamic', '역동적'),
 ('bright', '밝'),
 ('lazarus', '나사'),
 ('auction', '옥션'),
 ('logo', '로고'),
 ('founder', '설립자')]

In [1]:
[(net.vocs.id2word[w[0].item()],net.vocs.id2word[w[1].item()+54621]) for w in dictionary[3000:3100]]

NameError: name 'dictionary' is not defined

In [230]:
net.vocs.id2word[8]

's'

In [133]:
import json
import argparse
parser = argparse.ArgumentParser(description='Supervised training')
parser.add_argument("--seed", type=int, default=-1, help="Initialization seed")
parser.add_argument("--verbose", type=int, default=2, help="Verbose level (2:debug, 1:info, 0:warning)")
parser.add_argument("--exp_path", type=str, default="", help="Where to store experiment logs and models")
parser.add_argument("--exp_name", type=str, default="debug", help="Experiment name")
parser.add_argument("--exp_id", type=str, default="", help="Experiment ID")
#parser.add_argument("--cuda", type=bool_flag, default=True, help="Run on GPU")
parser.add_argument("--export", type=str, default="txt", help="Export embeddings after training (txt / pth)")

# data
parser.add_argument("--src_lang", type=str, default='en', help="Source language")
parser.add_argument("--tgt_lang", type=str, default='es', help="Target language")
parser.add_argument("--emb_dim", type=int, default=300, help="Embedding dimension")
parser.add_argument("--max_vocab", type=int, default=200000, help="Maximum vocabulary size (-1 to disable)")
# training refinement
parser.add_argument("--n_refinement", type=int, default=5, help="Number of refinement iterations (0 to disable the refinement procedure)")
# dictionary creation parameters (for refinement)
parser.add_argument("--dico_train", type=str, default="default", help="Path to training dictionary (default: use identical character strings)")
parser.add_argument("--dico_eval", type=str, default="default", help="Path to evaluation dictionary")
parser.add_argument("--dico_method", type=str, default='csls_knn_10', help="Method used for dictionary generation (nn/invsm_beta_30/csls_knn_10)")
parser.add_argument("--dico_build", type=str, default='S2T&T2S', help="S2T,T2S,S2T|T2S,S2T&T2S")
parser.add_argument("--dico_threshold", type=float, default=0, help="Threshold confidence for dictionary generation")
parser.add_argument("--dico_max_rank", type=int, default=10000, help="Maximum dictionary words rank (0 to disable)")
parser.add_argument("--dico_min_size", type=int, default=0, help="Minimum generated dictionary size (0 to disable)")
parser.add_argument("--dico_max_size", type=int, default=0, help="Maximum generated dictionary size (0 to disable)")
# reload pre-trained embeddings
parser.add_argument("--src_emb", type=str, default='', help="Reload source embeddings")
parser.add_argument("--tgt_emb", type=str, default='', help="Reload target embeddings")
parser.add_argument("--normalize_embeddings", type=str, default="", help="Normalize embeddings before training")


# parse parameters
#params_added = parser.parse_args()
params_added = parser.parse_args()

usage: ipykernel_launcher.py [-h] [--seed SEED] [--verbose VERBOSE]
                             [--exp_path EXP_PATH] [--exp_name EXP_NAME]
                             [--exp_id EXP_ID] [--export EXPORT]
                             [--src_lang SRC_LANG] [--tgt_lang TGT_LANG]
                             [--emb_dim EMB_DIM] [--max_vocab MAX_VOCAB]
                             [--n_refinement N_REFINEMENT]
                             [--dico_train DICO_TRAIN] [--dico_eval DICO_EVAL]
                             [--dico_method DICO_METHOD]
                             [--dico_build DICO_BUILD]
                             [--dico_threshold DICO_THRESHOLD]
                             [--dico_max_rank DICO_MAX_RANK]
                             [--dico_min_size DICO_MIN_SIZE]
                             [--dico_max_size DICO_MAX_SIZE]
                             [--src_emb SRC_EMB] [--tgt_emb TGT_EMB]
                             [--normalize_embeddings NORMALIZE_EMBEDDINGS]
ipykerne

SystemExit: 2

In [116]:
print([net.vocs.id2word[i] for i in range(54621,54700)])

['을', '는', '에', '은', '를', 'ㄴ', '가', '한', '고', '있', '로', '으로', '에서', '년', '들', '과', '했', '그', '일', '하는', '해', '것', 'ㅓㅆ', '도', '나', '하였', '월', '한다', '되었', 'ㅓ', 'ㄹ', '게', '등', '된', '들이', '대', '습니다', '사람', '하여', '며', 'ㅂ니다', '에게', '면', '에는', '것이', '지만', '히', '으며', '되', '때', '었다', '말', 'ㅆ', '없', '어', '우리', '다고', '스', '자', '만', '하게', '던', '라고', '많', '그러', '까지', '라', '명', '중', '같', '된다', '주', '제', '다는', '더', '대한', '하기', '되어', '라는']


In [118]:
net.emb[54621:54623]

tensor([[-3.9603e-02,  1.4521e-01, -1.2104e-01, -1.1321e-01, -1.4293e-01,
          2.2628e-02, -1.1703e-01, -1.5021e-02, -1.4876e-01,  7.0379e-02,
         -1.7221e-01,  1.3113e-01,  1.3274e-01,  1.0571e-01, -1.6291e-01,
          5.0270e-02, -2.5549e-01,  7.2935e-02,  1.6524e-01, -3.7083e-01,
          7.4613e-02, -4.9522e-02, -3.1357e-01, -5.8221e-02,  1.7784e-01,
          2.1596e-01,  4.0310e-01,  1.7364e-02,  4.9810e-02,  3.0469e-03,
          2.2192e-02, -5.3297e-01, -7.7074e-02, -5.9651e-02,  5.1684e-01,
          9.2652e-02,  2.2629e-01,  3.4512e-02, -5.8818e-01, -3.2606e-02,
         -6.0538e-02,  1.1353e-01, -1.2166e-01, -3.1682e-02,  1.1525e-01,
          1.1659e-01,  2.1780e-01, -4.6682e-02, -1.2379e-01, -1.0832e-01,
         -5.6783e-03,  4.9250e-02, -1.9953e-01,  1.5720e-01,  2.0204e-01,
          1.8510e-01,  3.1060e-01, -1.9300e-01,  3.2811e-01, -1.3091e-01,
          1.2648e-01, -3.1665e-01,  9.6038e-02, -1.1138e-01,  2.4818e-02,
          2.4642e-02,  2.2964e-01, -3.

In [None]:
import numpy as np
from tqdm.notebook import tqdm

def cos_sim(a,b):
    return sum(a*b)/((sum(a*a)**.5)*(sum(b*b)**.5))

wD = params['vocab'].vocs
emb = params['state_dict']['model_embeddings.vocabs.weight']
w = "man men woman women".split(' ')
mapped = emb[wD[w[0]]] - emb[wD[w[2]]] + emb[wD[w[3]]]
aa = torch.tensor([cos_sim(emb[i],mapped) for i in tqdm(range(3,10000))])  

aa.size(), cos_sim(emb[wD['queen']],emb[wD['queen']])
                
    

In [69]:
scipy.spatial.distance.cosine(src[0],tgt[0])

AttributeError: module 'scipy' has no attribute 'spatial'

In [6]:
import json
path = '../../../preProject/NMT/preproc_Module/inputs/'

with open(path+'dict_enT.json','r') as f:
    dictE = json.load(f)
with open(path+'dict_koT.json','r') as f:
    dictK = json.load(f)

In [137]:
net = Net(params, 300)
list(net.parameters())

[Parameter containing:
 tensor([[ 0.0367,  0.0331, -0.0166,  ..., -0.0038,  0.0020,  0.0333],
         [ 0.0051, -0.0550,  0.0519,  ...,  0.0196, -0.0466,  0.0145],
         [-0.0470,  0.0396,  0.0391,  ...,  0.0435, -0.0558, -0.0068],
         ...,
         [-0.0227, -0.0017,  0.0534,  ...,  0.0348, -0.0530, -0.0424],
         [ 0.0522, -0.0373, -0.0069,  ..., -0.0184,  0.0200, -0.0478],
         [-0.0366, -0.0521, -0.0287,  ..., -0.0114,  0.0231, -0.0196]],
        requires_grad=True)]

In [14]:
list(net.parameters())

[Parameter containing:
 tensor([[-0.0448, -0.0223,  0.0343,  ..., -0.0447, -0.0565, -0.0145],
         [ 0.0219, -0.0308, -0.0284,  ...,  0.0432,  0.0363,  0.0151],
         [ 0.0056,  0.0464, -0.0525,  ..., -0.0214, -0.0279,  0.0573],
         ...,
         [-0.0279,  0.0163,  0.0007,  ..., -0.0255, -0.0531, -0.0169],
         [ 0.0162,  0.0311, -0.0370,  ...,  0.0029,  0.0508, -0.0155],
         [-0.0564, -0.0338, -0.0108,  ..., -0.0028, -0.0249, -0.0358]],
        requires_grad=True)]

In [19]:
bi_words = [(w,sorted(v, key=lambda x:x[1][0], reverse = True)[0][0]) for w,v in dictE.items()]
bi_words += [(sorted(v, key=lambda x:x[1][0], reverse = True)[0][0],w) for w,v in dictE.items()]
net = Net(params, bi_words, 300)
net(['president'],['대통령']), net(['personal'],['개인']), net(['china'],['중국']), net(['korea'],['한국'])

(tensor(0.3189, grad_fn=<L1LossBackward>),
 tensor(0.3399, grad_fn=<L1LossBackward>),
 tensor(0.3080, grad_fn=<L1LossBackward>),
 tensor(0.3041, grad_fn=<L1LossBackward>))

In [31]:
#net = Net(params, 300)
net(['president'],['대통령']), net(['personal'],['개인']), net(['information'],['중국']), net(['china'],['중국'])

(tensor(0.3520, grad_fn=<L1LossBackward>),
 tensor(0.3822, grad_fn=<L1LossBackward>),
 tensor(0.3953, grad_fn=<L1LossBackward>),
 tensor(0.2889, grad_fn=<L1LossBackward>))

In [32]:
net(['president'],['대통령']), net(['korea'],['한국']), net(['information'],['미국']), net(['china'],['중국'])

(tensor(0.3520, grad_fn=<L1LossBackward>),
 tensor(0.3355, grad_fn=<L1LossBackward>),
 tensor(0.3939, grad_fn=<L1LossBackward>),
 tensor(0.2889, grad_fn=<L1LossBackward>))

In [24]:
bi_words = [(w,sorted(v, key=lambda x:x[1][0], reverse = True)[0][0]) for w,v in dictE.items()]
bi_words += [(sorted(v, key=lambda x:x[1][0], reverse = True)[0][0],w) for w,v in dictE.items()]

In [33]:
 bi_words[:10]

[('personal', '개인적'),
 ('computing', '컴퓨팅'),
 ('can', '수'),
 ('you', '당신'),
 ('mention', '언급'),
 ('few', '몇'),
 ('wireless', '무선'),
 ('optical', '광학'),
 ('mouse', '마우스'),
 ('another', '또')]

In [91]:
net = Net(params, 300)
beta = 0.1
src = [e[0] for e in bi_words]
tgt = [e[1] for e in bi_words]
M = net.procrustes(src,tgt)
U, S, V_t = scipy.linalg.svd(M, full_matrices=True)
W = U.dot(V_t)
#W = (1+beta)*W - beta*((W@W.transpose(0,1))@W)
#W = W@W.transpose(0,1)
net.w_map.weight.data = torch.tensor(W)
net(['president'],['대통령']), net(['personal'],['개인']), net(['china'],['중국']), net(['korea'],['한국'])

([tensor(0.5046, grad_fn=<DivBackward0>)],
 [tensor(0.2867, grad_fn=<DivBackward0>)],
 [tensor(0.5290, grad_fn=<DivBackward0>)],
 [tensor(0.4477, grad_fn=<DivBackward0>)])

In [93]:
W@W.transpose(0,1)

array([[ 1.0000000e+00,  2.3469329e-07,  8.8172965e-07, ...,
         6.2212348e-07,  5.5879354e-08, -5.8300793e-07],
       [-2.2538006e-07,  1.0000001e+00,  4.5681372e-07, ...,
        -2.2351742e-07, -1.6950071e-07,  3.1292439e-07],
       [-7.2410330e-07, -5.4575503e-07,  1.0000002e+00, ...,
        -7.9534948e-07,  1.0337681e-06,  9.0617687e-07],
       ...,
       [-4.9173832e-07,  2.9988587e-07,  6.2771142e-07, ...,
         1.0000002e+00,  2.9336661e-08,  2.9616058e-07],
       [-2.1792948e-07,  3.1478703e-07, -6.2957406e-07, ...,
        -5.2619725e-08,  1.0000001e+00, -1.8323772e-07],
       [ 4.4889748e-07, -2.3655593e-07, -6.6962093e-07, ...,
        -6.8917871e-08, -8.1490725e-08,  1.0000002e+00]], dtype=float32)

In [None]:
([tensor(0.5046, grad_fn=<DivBackward0>)],
 [tensor(0.2867, grad_fn=<DivBackward0>)],
 [tensor(0.5290, grad_fn=<DivBackward0>)],
 [tensor(0.4477, grad_fn=<DivBackward0>)])

In [50]:
aa = scipy.linalg.eigvals(W)
sum(aa)

(10.00001984834671+0j)

In [56]:
U.sum(), S.sum(), V_t.sum(), W.sum(), W.dot(W.transpose(1,0)).sum()

(-12.239889, 157415.62, -18.867588, 29.56411, 300.00006)

In [49]:
type(10.00001984834671+0j) 

complex

In [95]:
net(['china'],['대통령']), net(['personal'],['중국']), net(['information'],['정보']), net(['korea'],['한국'])

([tensor(0.0737, grad_fn=<DivBackward0>)],
 [tensor(0.1303, grad_fn=<DivBackward0>)],
 [tensor(0.4727, grad_fn=<DivBackward0>)],
 [tensor(0.4477, grad_fn=<DivBackward0>)])

In [98]:
net(['optical'],['광학'] ), net(['trump'],['오바마'] ), net(['obama'],['오바마'] ), net(['wireless'],['무선'] )

([tensor(0.3361, grad_fn=<DivBackward0>)],
 [tensor(0.2289, grad_fn=<DivBackward0>)],
 [tensor(0.3762, grad_fn=<DivBackward0>)],
 [tensor(0.4073, grad_fn=<DivBackward0>)])

In [38]:
U.shape

(300, 300)

In [28]:
from tqdm.notebook import tqdm

b_size = 32
beta = 0.01

net = Net(params, 300)
net.train()

uniform_init = 0.1

for p in net.parameters():
    p.data.uniform_(-uniform_init, uniform_init)
   
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

batch_num = len(bi_words) // b_size +1
for ik in tqdm(range(100)):
            
    for i in range(batch_num):
        src,tgt = next(batch_iter(bi_words,b_size))
        optimizer.zero_grad()
        loss = net(src,tgt)       
        loss.backward()
        optimizer.step()
    #print(list(net.parameters())[0])
    #print('complete on epoch')  
    with torch.no_grad():
        W = net.w_map.weight.data
        U, S, V_t = scipy.linalg.svd(W, full_matrices=True)
        W = U@V_t
        #list(net.parameters())[0]
        W = (1+beta)*W - beta*((W@W.transpose(0,1))@W)
        net.w_map.weight.data = torch.tensor(W)
        #for p in net.parameters():
        #    p.data = W
            #p.data.requires_grad_()

            #p.data.requires_grad_()

    #print(list(net.parameters())[0])

HBox(children=(FloatProgress(value=0.0), HTML(value='')))




In [92]:
    for p in net.parameters():
        p.data = W
        p.data.requires_grad_()

In [57]:
list(net.parameters())[0], 
for p in net.parameters():
    print(len(p.data), len(p.data[0]))

300 300


In [77]:
W = list(net.parameters())[0].requires_grad_(False)
W = (1+0.01)*W -0.01*(W@W.transpose(0,1)@W)
for p in net.parameters():
    p.data = W
list(net.parameters())[0]


Parameter containing:
tensor([[ 0.0310, -0.0017,  0.0171,  ..., -0.0174, -0.0127, -0.0078],
        [ 0.0254, -0.0097, -0.0098,  ..., -0.0167,  0.0122, -0.0286],
        [ 0.0228, -0.0086,  0.0436,  ...,  0.0654, -0.0272,  0.0072],
        ...,
        [ 0.0241,  0.0039,  0.0124,  ...,  0.0379,  0.0423, -0.0138],
        [ 0.0009, -0.0169, -0.0552,  ...,  0.0178,  0.0275, -0.0162],
        [ 0.0173, -0.0218,  0.0376,  ..., -0.0193, -0.0095, -0.0664]])

In [89]:
len(list(net.parameters())) #[0].requires_grad_()

1

In [79]:
for p in net.parameters():
    p.data = W
list(net.parameters())[0]

Parameter containing:
tensor([[ 0.0310, -0.0017,  0.0171,  ..., -0.0174, -0.0127, -0.0078],
        [ 0.0254, -0.0097, -0.0098,  ..., -0.0167,  0.0122, -0.0286],
        [ 0.0228, -0.0086,  0.0436,  ...,  0.0654, -0.0272,  0.0072],
        ...,
        [ 0.0241,  0.0039,  0.0124,  ...,  0.0379,  0.0423, -0.0138],
        [ 0.0009, -0.0169, -0.0552,  ...,  0.0178,  0.0275, -0.0162],
        [ 0.0173, -0.0218,  0.0376,  ..., -0.0193, -0.0095, -0.0664]])

In [110]:
net(['president'],['대통령']), net(['information'],['정보']), net(['personal'],['다중']), net(['china'],['교환']),net(['korea'],['한국'])

(tensor(0.2289, grad_fn=<L1LossBackward>),
 tensor(0.2958, grad_fn=<L1LossBackward>),
 tensor(0.3492, grad_fn=<L1LossBackward>),
 tensor(0.3658, grad_fn=<L1LossBackward>),
 tensor(0.2329, grad_fn=<L1LossBackward>))

In [71]:
dictE['korea']

[['한국', [0.50732081031218, 199807, 1.7897433749406606]],
 ['북한', [0.36813596455527875, 208786, 1.5903648838103746]],
 ['국내', [0.24133897939656412, 163304, 3.6500185084996724]],
 ['일', [0.2236404031691881, 522615, 0.3249879573054788]],
 ['자유한국당', [0.21126844827059588, 155178, 4.7488237691253286]]]

In [79]:
net.parameters

AttributeError: 'function' object has no attribute 'keys'

In [None]:
def batch_iter(bi_words,b_size):
    batch_size = b_size
    batch_num = math.ceil(len(bi_words) / batch_size)
    index_array = list(range(len(bi_words)))

    if shuffle:
        np.random.shuffle(index_array)

    for i in range(batch_num):
        indices = index_array[i * batch_size: (i + 1) * batch_size]
        examples = [bi_words[idx] for idx in indices]
        src = [e[0] for e in examples]
        tgt = [e[1] for e in examples]

        yield src, tgt

In [None]:
sorted(bi_words,key=lambda x:x[0],reverse=True)[1200:1250]

In [24]:

dictX ={}
for w,v in dictE.items():
    dX = {}
    for wt in v:
        dX[wt[0]] = wt[1][0]/(1+np.abs(np.log(wt[1][2])))
    denome = sum(dX.values())
    dictX[w] = {k:v/denome for k,v in dX.items()}
    
dictX['true']

{'참': 0.7092478180651339, '사실': 0.1424496491409793, '숭배': 0.14830253279388678}

In [38]:
dictX['china']

{'중국': 0.8330298871257, '미국': 0.10416938597051914, '무역전쟁': 0.06280072690378084}

In [22]:
import numpy as np
np.abs(np.log(0.86)), np.abs(np.log(1.33))

(0.15082288973458366, 0.28517894223366247)

In [27]:
dictE['former']

[['전', [0.6036746244441473, 190080, 0.4297426794136009]],
 ['검찰', [0.13637146344892434, 90863, 1.6938333827453305]],
 ['혐의', [0.13075522730706107, 83887, 2.1354937579427373]],
 ['이전', [0.12848188311517145, 73686, 3.4515193620491753]],
 ['대통령', [0.10423307560107185, 160728, 0.5515034509387519]]]

In [18]:
print(bi_words[:100])

[('많', 'many'), ('개인적', 'personal'), ('컴퓨팅', 'computing'), ('수', 'can'), ('우리', 'our'), ('어떻게', 'how'), ('가능', 'possible'), ('당신', 'you'), ('나', 'i'), ('그', 'he'), ('일', 'th'), ('올해', 'year'), ('때문', 'because'), ('언급', 'mention'), ('몇', 'few'), ('소수', 'minority'), ('전', 'former'), ('무선', 'wireless'), ('광학', 'optical'), ('쥐', 'rats'), ('마우스', 'mouse'), ('다르', 'other'), ('또', 'another'), ('모든', 'all'), ('모두', 'all'), ('지만', 'but'), ('않', 'not'), ('아니', 'not'), ('또한', 'also'), ('필요', 'need'), ('책상', 'desk'), ('데스크', 'desk'), ('센서', 'sensor'), ('감지', 'detect'), ('통제', 'control'), ('제어', 'control'), ('운동', 'movement'), ('독립운동', 'independence'), ('손목', 'wrist'), ('팔', 'arm'), ('통', 'through'), ('공기', 'air'), ('항공', 'aviation'), ('공기청정기', 'purifier'), ('오염', 'pollution'), ('인공지능', 'artificial'), ('정보', 'information'), ('지성', 'intelligent'), ('국정원', 'nis'), ('관계자', 'official'), ('공식', 'official'), ('관리', 'management'), ('공개', 'unveil'), ('음모', 'conspiracy'), ('선박', 'ship'), ('동남아', 'southeast'

In [12]:
list(dictE)[:10],dictE['personal']

(['personal',
  'computing',
  'can',
  'you',
  'mention',
  'few',
  'wireless',
  'optical',
  'mouse',
  'another'],
 [['개인적', [0.5028736421047497, 36103, 2.2180229967020235]],
  ['개인정보', [0.33360332737029397, 28843, 6.2854256125284165]],
  ['개인', [0.32997207139147905, 42227, 1.434815199215822]]])

In [7]:
net =Net(params, 300)
print(net)

Net(
  (w_map): Linear(in_features=300, out_features=300, bias=False)
)


In [71]:
aa.max(-1)

(tensor(0.6918), tensor(335))

In [74]:
wD.id2word[aa.max(-1)[1].item()+3]

'men'

In [67]:
wD['queen']

2850

In [68]:
cos_sim(emb[wD['queen']],mapped)

tensor(0.4075)

In [66]:
wD.id2word[553]

'king'

wD.id2word[88]

In [59]:
cos_sim(emb[10],emb[10])

tensor(1.0000)

In [24]:
aa = params['state_dict']['model_embeddings.vocabs.weight'][5537]
bb = params['state_dict']['model_embeddings.vocabs.weight'][129]
sum(aa*bb), sum(aa*aa), sum(bb*bb)

(tensor(10.1422), tensor(39.2305), tensor(35.5234))

In [30]:
cc = params['state_dict']['en_gate.weight']

torch.Tensor

In [32]:
type(cc.t)

builtin_function_or_method

In [34]:
dd = torch.matmul(cc,torch.transpose(cc,0,1))
dd.size()

torch.Size([300, 300])

In [2]:
params['state_dict'].keys()

odict_keys(['model_embeddings.vocabs.weight', 'ek_encoder.weight_ih_l0', 'ek_encoder.weight_hh_l0', 'ek_encoder.bias_ih_l0', 'ek_encoder.bias_hh_l0', 'ek_encoder.weight_ih_l0_reverse', 'ek_encoder.weight_hh_l0_reverse', 'ek_encoder.bias_ih_l0_reverse', 'ek_encoder.bias_hh_l0_reverse', 'ek_encoder.weight_ih_l1', 'ek_encoder.weight_hh_l1', 'ek_encoder.bias_ih_l1', 'ek_encoder.bias_hh_l1', 'ek_encoder.weight_ih_l1_reverse', 'ek_encoder.weight_hh_l1_reverse', 'ek_encoder.bias_ih_l1_reverse', 'ek_encoder.bias_hh_l1_reverse', 'ek_decoder.weight_ih', 'ek_decoder.weight_hh', 'ek_decoder.bias_ih', 'ek_decoder.bias_hh', 'ek_h_projection.weight', 'ek_c_projection.weight', 'ek_att_projection.weight', 'ek_combined_output_projection.weight', 'ek_target_vocab_projection.weight', 'ke_encoder.weight_ih_l0', 'ke_encoder.weight_hh_l0', 'ke_encoder.bias_ih_l0', 'ke_encoder.bias_hh_l0', 'ke_encoder.weight_ih_l0_reverse', 'ke_encoder.weight_hh_l0_reverse', 'ke_encoder.bias_ih_l0_reverse', 'ke_encoder.bias_h

In [4]:
cd nmt_bi_combined_0702

/home/john/Notebook/Project2/Translation/nmt_bi_combined_0702


In [None]:
!tar -xzvf nmt_bi_combined_0702_lr0002.tar.gz