In [32]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from sklearn import metrics 
from rouge import Rouge 
from tqdm import tqdm

from parser import *
from utils import *
from models import Decoder
from datasets import HistDataset
from run import *

In [33]:
opt = get_parser()
# loader = Loader(opt.dataset, opt.gpu)
loader = Loader("example", False)

ArgumentParser(prog='ipykernel_launcher.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)
Namespace(alpha=0.2, batch_size=32, beam_size=2, ctx_attn='sum_co', dataset='example', dropout=0.6, epochs=2, gpu=False, hidden_size=128, integration='', lamb=0.05, lr=0.003, n_ctxs=2, n_features=128, neg=2, nheads=4, optim='Adam', resume=False, task='multi', weight_decay=5e-06)
data_load_over!


In [34]:
print("train_data:")
print(loader.train_data.head())

# 查看 test_data 的前几行数据
print("test_data:")
print(loader.test_data.head())

train_data:
   paper_id1  paper_id2                                      clean_content  \
0          0         55  obviously they satisfy the constraints fullfco...   
1          0         53  2table 2 bleu 4 scores with one reference tran...   
2          0         24  note that the re sulting conditional distribut...   
3          0         89  the learning mod els are support vector machin...   
4          0         56  we note here other ex amples described their a...   

   num_of_words  
0            97  
1           103  
2           100  
3           128  
4            46  
test_data:
     paper_id1  paper_id2                                      clean_content  \
821         82         19  once the feature values are computed our goal ...   
771         77         69  next we extracted phrase level translation pai...   
954         95         92  the stanford parser 13 and crf chunker 14 have...   
631         63         50  g k best forest oracles orhopfear derivations ...   


In [35]:
print(loader.max_len)

191


In [36]:
word = loader.build_word("once")
print(word)

None


In [37]:
loader.idx2word[100]

'features'

In [38]:
G_out= loader.G_out
# G_out.nodes()：返回图中所有节点的列表。
# G_out.edges()：返回图中所有边的列表。
# G_out.adj[node]：返回与节点 node 相邻的所有节点的字典，字典的键是相邻节点的标识符，值是边的属性字典。
# G_out.degree(node)：返回节点 node 的度数（即与该节点相邻的边的数量）。
# G_out.number_of_nodes()：返回图中节点的数量。
# G_out.number_of_edges()：返回图中边的数量。

In [39]:
print(len(G_out.nodes()))
print(len(G_out.edges()))
# for u, v, d in G_out.edges(data=True):
#     print(u, v, d['context'])
# 0 55 4 1 inference based learningmany learning paradigms can be defined asinference based learning these include the per ceptronand its large margin vari ants in these settings a models parameters areiteratively updated based on the argmax calculationfor a single or set of training instances under thecurrent parameter settings
# 0 53 2table 2 bleu 4 scores with one reference translating into english the numbers in parentheses are times inhours to run parameter optimization end to end refers to moses linear model features extended refers to non linear and hidden state features polynomial features future cost dl refers to distortion limit search is the set ofparameters controlling search quality parameters controlling beam size histogram pruning and threshold pruningour baseline system is trained for each languagepair by running minimum error rate trainingon 1000 sentences each iteration of mertutilizes 19 random starting points plus the points ofconvergence at all previous iterations of mert anda uniform weight vector
# 0 24 note that the re sulting conditional distribution will be drawn solelyfrom one input distribution when the conditioningcontext is unseen in the remaining distributions thismay lead to an over reliance on unreliable distribu tions which can be ameliorated by smoothing as an alternative to linear interpolation we alsoemploy a weighted product for phrase table combi nation p jpjj 3this has the same form used for log linear trainingof smt decoderswhich allows us totreat each distribution as a feature and learn the mix ing weights automatically note that we must indi vidually smooth the component distributions in to stop zeros from propagating

101
962


# 构建模型对象Decoder

In [40]:
model = Decoder(n_words=loader.n_words,  # 9860
                    n_nodes=loader.n_nodes,  # 101
                    max_len=loader.max_len,  # 191
                    opt = opt)
if opt.gpu: model= model.cuda()

In [41]:
# 模型的参数通常保存在一个叫做“状态字典”（state dictionary）的对象中。state_dict() 方法可以返回模型的状态字典，其中包含了所有模型的参数及其对应的值
print(model.node_embedding.state_dict()) # weight 是 node_embedding 的参数名，tensor 对象则是参数的值

OrderedDict([('weight', tensor([[-0.0332,  0.1050,  1.0852,  ..., -0.2843,  1.7697, -0.0296],
        [-0.0047, -0.7061, -0.9779,  ..., -1.2059, -1.5961,  1.4125],
        [ 1.5672, -0.3576, -0.1071,  ..., -0.1798,  1.1636, -1.1166],
        ...,
        [-0.8575,  1.1274,  1.0805,  ..., -0.9891,  0.0766,  2.3136],
        [-0.3479,  1.2266, -0.3459,  ...,  2.9780,  0.9717, -0.5160],
        [ 1.8138,  1.6081,  0.5013,  ..., -0.2186, -0.2150, -0.0863]]))])


In [42]:
node_embedding_state_dict = model.node_embedding.state_dict()
node_weight_shape = node_embedding_state_dict['weight'].shape
print(node_weight_shape)
word_embedding_state_dict = model.word_embedding.state_dict()
word_weight_shape = word_embedding_state_dict['weight'].shape
print(word_weight_shape)

torch.Size([101, 128])
torch.Size([9860, 128])


In [43]:
if opt.optim == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=opt.lr)
# Adagrad 优化器适合处理具有稀疏梯度和欠约束的目标函数
elif opt.optim == 'Adagrad':
    optimizer = optim.Adagrad(model.parameters(), lr=opt.lr)
# SGD 优化器则更适合处理大规模数据集和稳定的目标函数
else:
    optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9)

# 划分数据集

In [44]:
print('Training...')
train_dataset = HistDataset(loader, opt) # 数据集相关 负样本 划分等

Training...


In [45]:
print(train_dataset.n_ctxs)

2


# 加载数据

In [46]:
train_dl = DataLoader(train_dataset,
                          opt.batch_size,
                          pin_memory=True,
                          shuffle=True,
                          collate_fn=loader.collate_fun,
                          num_workers=1)

In [47]:
train(model, train_dl, opt.epochs, optimizer)


[Epoch 1]:   0%|                                         | 0/32 [00:00<?, ?it/s]



NameError: Caught NameError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/root/miniconda3/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    return self.collate_fn(data)
  File "/root/autodl-tmp/AutoCite-main/utils.py", line 158, in collate_fun
    input_lengths = input_lengths.cuda()
NameError: name 'h_idx' is not defined


In [1]:
import pickle
path = 'data/{}.pkl'.format("example")
train_data, test_data = pickle.load(open(path, 'rb'))
train_data.head()

ModuleNotFoundError: No module named 'pandas.core.indexes.numeric'