In [34]:
import argparse
import pickle as pkl

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from data_iter import DisDataIter, GenDataIter
import numpy as np
import gensim

import generator_kmer
import discriminator_kmer
import target_lstm_kmer
from generator_kmer import Generator
from discriminator_kmer import Discriminator
from target_lstm_kmer import TargetLSTM
from loss import PGLoss

import preprocessdna
import load_data
import loss
import os

# Arguemnts
parser = argparse.ArgumentParser(description='SeqGAN')
parser.add_argument('--hpc', action='store_true', default=False,
                    help='set to hpc mode')
parser.add_argument('--data_path', type=str, default='/scratch/zc807/seq_gan/', metavar='PATH',
                    help='data path to save files (default: /scratch/zc807/seq_gan/)')
parser.add_argument('--rounds', type=int, default=2, metavar='N',
                    help='rounds of adversarial training (default: 150)')
parser.add_argument('--g_pretrain_steps', type=int, default=2, metavar='N',
                    help='steps of pre-training of generators (default: 120)')
parser.add_argument('--d_pretrain_steps', type=int, default=2, metavar='N',
                    help='steps of pre-training of discriminators (default: 50)')
parser.add_argument('--g_steps', type=int, default=1, metavar='N',
                    help='steps of generator updates in one round of adverarial training (default: 1)')
parser.add_argument('--d_steps', type=int, default=1, metavar='N',
                    help='steps of discriminator updates in one round of adverarial training (default: 3)')
parser.add_argument('--gk_epochs', type=int, default=1, metavar='N',
                    help='epochs of generator updates in one step of generate update (default: 1)')
parser.add_argument('--dk_epochs', type=int, default=1, metavar='N',
                    help='epochs of discriminator updates in one step of discriminator update (default: 3)')
parser.add_argument('--update_rate', type=float, default=0.8, metavar='UR',
                    help='update rate of roll-out model (default: 0.8)')
parser.add_argument('--n_rollout', type=int, default=1, metavar='N',
                    help='number of roll-out (default: 16)')
parser.add_argument('--vocab_size', type=int, default=10, metavar='N',
                    help='vocabulary size (default: 10)')
parser.add_argument('--batch_size', type=int, default=2, metavar='N',
                    help='batch size (default: 64)')
parser.add_argument('--n_samples', type=int, default=2, metavar='N',
                    help='number of samples gerenated per time (default: 6400)')
parser.add_argument('--gen_lr', type=float, default=1e-3, metavar='LR',
                    help='learning rate of generator optimizer (default: 1e-3)')
parser.add_argument('--dis_lr', type=float, default=1e-3, metavar='LR',
                    help='learning rate of discriminator optimizer (default: 1e-3)')
parser.add_argument('--no_cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')

# Discriminator Parameters
Tensor =  torch.LongTensor
d_num_class = 2
d_embed_dim = 64
d_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
d_num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160]
d_dropout_prob = 0.2

In [42]:
path_prefix = './'
# args = parser.parse_args()
# args.cuda = not args.no_cuda and torch.cuda.is_available()
train_seq = load_data.get_seqs("train.fa")
test_seq = load_data.get_seqs("test.fa")
#     # print(seq_list)
train_kmer_list = load_data.getKmerList(train_seq,3)
test_kmer_list = load_data.getKmerList(test_seq,3)
ref_model = load_data.train_word2vec(test_kmer_list)
# print(model)
#     # print("saving model ...")
model = load_data.train_word2vec(train_kmer_list + test_kmer_list)
model.save(os.path.join(path_prefix, 'w2v_all.model'))
ref_model.save(os.path.join(path_prefix, 'ref.model')) #将模型保存这一步可以使得后续的训练更方便，是一个很好的习惯
g_embed_dim = 64
g_hidden_dim = 32
#     # # 150 - 3 + 1
train_preprocess = preprocessdna.Preprocess(train_kmer_list, 148, "w2v_all.model")
real_embedding = train_preprocess.make_embedding(load=True)
real_data = train_preprocess.sentence_word2idx()
# real_embedding = Tensor(real_embedding)
# train_preprocess = preprocessdna.Preprocess(train_kmer_list, 67, "ref.model")
# ref_embedding = train_preprocess.make_embedding(load=True)
print(real_data.size())
#     # # def __init__(self,  pre_weight,vocab_size, embedding_dim, hidden_dim, use_cuda):
target_lstm = target_lstm_kmer.TargetLSTM(real_embedding, g_embed_dim, g_hidden_dim, False)
# print(target_lstm)
generator = Generator(real_embedding, g_embed_dim, g_hidden_dim, False)
discriminator = Discriminator(real_embedding,d_num_class, d_embed_dim, d_filter_sizes, d_num_filters, d_dropout_prob)
# # print(generator)
# # print(discriminator)
nll_loss = nn.NLLLoss()
pg_loss = PGLoss()

gen_optimizer = optim.Adam(params=generator.parameters(), lr=1e-3)
dis_optimizer = optim.SGD(params=discriminator.parameters(), lr=1e-3)

for epoch in range(5):
    for i, real_word in enumerate(real_embedding):
        # z = Tensor(np.random.normal(0, 1, (real_embedding.shape[0], real_embedding.shape[1])))
        z = torch.randn(real_embedding.shape[0],real_embedding.shape[1])
        # print(z.dtype)
        gen_word = generator(z)
#         print(gen_word)
        # discriminator(gen_word)
        # gen_optimizer.zero_grad()
        # g_loss = nll_loss(discriminator(gen_word),torch.ones(real_embedding.size(0),1))

        # g_loss.backward()
        # gen_optimizer.step()
        # dis_optimizer.zero_grad()

        # real_loss = nll_loss(discriminator(real_word), torch.ones(real_embedding.size(0),1))
        # fake_loss = nll_loss(discriminator(gen_word.detach()), gen_word)
        # d_loss = (real_loss + fake_loss) / 2

        # d_loss.backward()
        # dis_optimizer.step()






loading DNA to vec model ...
get words #64
Total words: 66
torch.Size([13, 148])


RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)