In [1]:
from functools import partial
from tqdm import trange
import torch
import torch.nn.functional as F
import numpy as np
import pytorch_pretrained_bert
from pytorch_pretrained_bert import TransfoXLTokenizer, TransfoXLModel, TransfoXLLMHeadModel
for mod in (np, torch, pytorch_pretrained_bert):
    print(f'{mod.__name__}: {mod.__version__}')

numpy: 1.16.2
torch: 1.1.0
pytorch_pretrained_bert: 0.6.2


# Build model Transformer XL

In [2]:
seed = 0
np.random.seed(seed)
torch.random.manual_seed(seed)
torch.cuda.manual_seed(seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name_or_path = 'transfo-xl-wt103'
tokenizer = TransfoXLTokenizer.from_pretrained(model_name_or_path)
model = TransfoXLLMHeadModel.from_pretrained(model_name_or_path)
model.to(device)
model.eval()

TransfoXLLMHeadModel(
  (transformer): TransfoXLModel(
    (word_emb): AdaptiveEmbedding(
      (emb_layers): ModuleList(
        (0): Embedding(20000, 1024)
        (1): Embedding(20000, 256)
        (2): Embedding(160000, 64)
        (3): Embedding(67735, 16)
      )
      (emb_projs): ParameterList(
          (0): Parameter containing: [torch.FloatTensor of size 1024x1024]
          (1): Parameter containing: [torch.FloatTensor of size 1024x256]
          (2): Parameter containing: [torch.FloatTensor of size 1024x64]
          (3): Parameter containing: [torch.FloatTensor of size 1024x16]
      )
    )
    (drop): Dropout(p=0.1)
    (layers): ModuleList(
      (0): RelPartialLearnableDecoderLayer(
        (dec_attn): RelPartialLearnableMultiHeadAttn(
          (qkv_net): Linear(in_features=1024, out_features=3072, bias=False)
          (drop): Dropout(p=0.1)
          (dropatt): Dropout(p=0.0)
          (o_net): Linear(in_features=1024, out_features=1024, bias=False)
          (laye

Dummy prediction, to check vocab size:

In [3]:
line = "Dummy"
line_tokenized = tokenizer.tokenize(line)
line_indexed = tokenizer.convert_tokens_to_ids(line_tokenized)
tokens_tensor = torch.tensor([line_indexed])
predictions, _ = model(tokens_tensor)
vocab_size = predictions.shape[-1]
assert vocab_size == 267735  # WikiText-103 vocab size

# Minimal example

## Online text generation

In [92]:
line = "Cars were invented in"
max_predictions = 16
top_k = 2

line_tokenized = tokenizer.tokenize(line)
line_indexed = tokenizer.convert_tokens_to_ids(line_tokenized)
tokens_tensor = torch.tensor([line_indexed])
tokens_tensor = tokens_tensor.to(device)
mems = None

for i in range(max_predictions):
    predictions, mems = model(tokens_tensor, mems=mems)
    context_size = tokens_tensor.shape[1]
    assert predictions.shape == (1, context_size, vocab_size)
    topk = torch.topk(predictions[0, -1, :], 10)
    predicted_index = topk.indices[top_k-1].item()
    predicted_index = torch.tensor([[predicted_index]]).to(device)
    tokens_tensor = torch.cat((tokens_tensor, predicted_index), dim=1)
    
    predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
    print(predicted_token, end=' ', flush=True)
    print('\n', tokenizer.convert_ids_to_tokens(topk.indices), '\n')

Britain 
 ['the', 'Britain', 'late', '1978', '1974', 'England', '1979', '1982', '2000', '1972'] 

and 
 ['.', 'and', ',', 'by', 'in', ':', ';', 'as', 'for', 'during'] 

America 
 ['the', 'America', 'France', 'were', 'shipped', 'Canada', 'Italy', 'Europe', 'cars', 'in'] 

, 
 ['.', ',', 'in', 'and', ';', 'by', '(', ':', 'as', 'during'] 

but 
 ['and', 'but', 'including', 'the', 'with', 'although', 'which', 'as', 'such', 'in'] 

the 
 ['were', 'the', 'cars', 'are', 'in', 'not', 'they', 'have', 'their', 'only'] 

first 
 ['British', 'first', 'most', 'European', 'UK', '"', 'American', '<unk>', 'design', 'US'] 

two 
 ['cars', 'two', 'British', 'ones', 'American', 'car', 'examples', 'horses', 'European', '<unk>'] 

cars 
 ['were', 'cars', 'are', 'of', 'in', 'types', '@-@', ',', 'vehicles', 'prototypes'] 

had 
 ['were', 'had', 'are', 'did', 'came', ',', 'in', 'have', 'could', 'made'] 

to 
 ['been', 'to', 'a', 'not', 'the', 'already', 'no', 'arrived', 'yet', 'an'] 

have 
 ['be', 'have', 'c

> **NOTE**: this text is generated choosing at each step the top_k most probable token.
> This is **online text generation**, since at each step, the model only knows the past.

## Off-line text generation

In [81]:
def print_text(input_tokens, predicted_tensor, top_k=5):
    print(f'\n[top {top_k} token] PROMPT:', line)
    for i in range(len(line_indexed) - 1, context_size):
        topk = torch.topk(predicted_tensor[0, i, :], top_k)
        top_k_predictions = tokenizer.convert_ids_to_tokens(topk.indices)
        print(top_k_predictions[top_k - 1], end=' ')
    print()
        
input_text = tokenizer.convert_ids_to_tokens(tokens_tensor.tolist()[0])
for i in range(1, 5):
    print_text(input_text, predictions, top_k=i)      


[top 1 token] PROMPT: Cars were invented in
the . America . and the first two were were to be been a car <unk> 

[top 2 token] PROMPT: Cars were invented in
Britain and the , but a only car cars had been have a the " Turbo 

[top 3 token] PROMPT: Cars were invented in
America , American and so it second one had did a meet the built British car 

[top 4 token] PROMPT: Cars were invented in
Australia in Canada in as not last vehicle horses could not not an an Turbo one 


> **NOTE**: this text is generated choosing at each step the top_k most probable token.
> This is **offline text generation** using the final `prediction` tensor 
> that has information about the whole sequence (so for each word the prediction has been influenced by the future).
>
> The text seems worst, probably because the model is trained to optimize the online prediction
> like in the previous example

In [85]:
def print_input_output(input_tokens, predicted_tensor, top_k=10):
    print(f'  MODEL INPUTS    MODEL OUTPUT (top {top_k} tokens)')
    print(f'  ------------    -----------------------------')
    for i in range(context_size):
        topk = torch.topk(predicted_tensor[0, i, :], top_k)
        p = '* ' if i < len(line_indexed) else '  '
        print(f'{p}{input_tokens[i]:14s}:', end=' ')
        top_k_predictions = tokenizer.convert_ids_to_tokens(topk.indices)
        print(' '.join(top_k_predictions))
        #print('', np.round(topk.values.tolist(), 2))
        
input_text = tokenizer.convert_ids_to_tokens(tokens_tensor.tolist()[0])
print_input_output(input_text, predictions)      

  MODEL INPUTS    MODEL OUTPUT (top 10 tokens)
  ------------    -----------------------------
* Cars          : . , in and built of to before design (
* were          : built the a used made produced in completed first also
* invented      : in . , and before at to on from into
* in            : the Britain America Australia Canada England France Europe a both
  Britain       : . and , in as ; before ( but to
  and           : America the American Canada a France Europe Australia North Japan
  America       : . , and in before ; as ( but to
  ,             : and but so as the although a before which with
  but           : the a it not only were in did there had
  the           : first only second last third final two fourth largest most
  first         : two car one vehicle three was of four product prototype
  two           : were cars had horses vehicles of ships in , did
  cars          : were had did could , would in needed are came
  had           : to been a not problems already

> **NOTE**: lines starting with `*` are inputs in the initial prompt.

> **NOTE 2**: the top tokens are imprecise, because the prediction was done online,
> while here we use the final `prediction` tensor to score the tokens (offline prediction)

## Online text generation with sampling

In [156]:
seed = 0
prompt = "Cars were invented in"
max_predictions = 25
top_k = 40

np.random.seed(seed)
torch.random.manual_seed(seed)
line_tokenized = tokenizer.tokenize(prompt)
line_indexed = tokenizer.convert_tokens_to_ids(line_tokenized)
tokens_tensor = torch.tensor([line_indexed])
tokens_tensor = tokens_tensor.to(device)
mems = None

print(f'PROMPT: {prompt}')
print('MODEL:  ', end='')
for i in range(max_predictions):
    predictions, mems = model(tokens_tensor, mems=mems)
    context_size = tokens_tensor.shape[1]
    assert predictions.shape == (1, context_size, vocab_size)
    
    # sample next token from the most probable top-k
    last_prediction = predictions[0, -1, :]
    topk = torch.topk(last_prediction, top_k)
    log_probs = F.softmax(topk.values, dim=-1)  # softmax among the top-k
    rand_idx_in_topk = torch.multinomial(log_probs, num_samples=1)
    predicted_index = topk.indices[rand_idx_in_topk]
    
    # test
    last_pred_trunk = top_k_logits(last_prediction.reshape(1, -1), top_k)
    sorted_valid_values = last_pred_trunk[last_pred_trunk > -1e10].sort(descending=True).values
    assert all(sorted_valid_values == topk.values)
    
    # update model state
    predicted_index = torch.tensor([[predicted_index]]).to(device)
    tokens_tensor = torch.cat((tokens_tensor, predicted_index), dim=1)
    
    # print current token
    predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
    print(predicted_token, end=' ', flush=True)

PROMPT: Cars were invented in
MODEL:  1967 by Dr. Carl R. B. <unk> . In 1968 , the first <unk> were developed by Dr. George P. <unk> and Dr. Charles A. 

In [160]:
def gen_text_sample(
        prompt = "Cars were invented in",
        seed = 0,
        length = 5,
        top_k = 40,
        top_p = None,
    ):
    np.random.seed(seed)
    torch.random.manual_seed(seed)
    line_tokenized = tokenizer.tokenize(prompt)
    line_indexed = tokenizer.convert_tokens_to_ids(line_tokenized)
    tokens_tensor = torch.tensor([line_indexed])
    tokens_tensor = tokens_tensor.to(device)
    if top_p is not None:
        assert 0 < top_p <= 1, '`top_p` must be in (0..1]'
        top_k = round(tokens_tensor.shape[1] * top_p)

    print(f'PROMPT: {prompt}')
    print('MODEL:  ', end='')
    mems = None
    for i in range(length):
        predictions, mems = model(tokens_tensor, mems=mems)
        context_size = tokens_tensor.shape[1]
        assert predictions.shape == (1, context_size, vocab_size)

        # sample next token from the most probable top-k
        last_prediction = predictions[0, -1, :]
        topk = torch.topk(last_prediction, top_k)
        log_probs = F.softmax(topk.values, dim=-1)  # softmax among the top-k
        rand_idx_in_topk = torch.multinomial(log_probs, num_samples=1)
        predicted_index = topk.indices[rand_idx_in_topk]

        # test
        last_pred_trunk = top_k_logits(last_prediction.reshape(1, -1), top_k)
        sorted_valid_values = last_pred_trunk[last_pred_trunk > -1e10].sort(descending=True).values
        assert all(sorted_valid_values == topk.values)

        # update model state
        predicted_index = torch.tensor([[predicted_index]]).to(device)
        tokens_tensor = torch.cat((tokens_tensor, predicted_index), dim=1)

        # print current token
        predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
        print(predicted_token, end=' ', flush=True)

In [161]:
prompt = "Cars were invented in"
gen_text_sample(top_k=40, length=10)

PROMPT: Cars were invented in
MODEL:  1967 by Dr. Carl R. B. <unk> . In 1968 

In [162]:
prompt = "Cars were invented in"
gen_text_sample(top_p=0.5, length=10)

PROMPT: Cars were invented in
MODEL:  Britain and the US . The first cars were produced 

# Test generation

In [None]:
prompt = 'What do you know about Machine Learning and Natural Language Processing?'
length = 60
for seed in range(5):
    gen_text_sample(top_p=0.5, length=length, seed=seed)
    print()

PROMPT: Cars were invented in
MODEL:  Britain and 

In [None]:
prompt = 'What do you know about Machine Learning and Natural Language Processing?'
length = 60
for seed in range(5):
    gen_text_sample(top_k=40, length=length, seed=seed)
    print()

# Model exploration

The file `textgen.py` provides an API for text generation for both *Transformer XL* and other models (*GPT2*, etc..).

It requires:

- mode signature: `model(prev, past=tensor)` 
- function `decoder(ids)` returning tokens
- from `generate_text_<model>` function, use partial to assign model specific args and create a
  function `gen_text` with will have the same signature for all models

In [36]:
%run -i textgen.py

In [37]:
def model_comp(prev, past):
    return model(prev, mems=past)

In [38]:
decoder = partial(decoder_transformer_xl, tokenizer=tokenizer)
#decoder?

In [39]:
gen_text = partial(generate_text_transformer_xl, model_comp, tokenizer, decoder)

In [40]:
gen_text(line, 
         length=10, sample=False, top_k=2)

  0%|          | 0/10 [00:00<?, ?it/s]

context[<class 'list'>]: [17512, 28, 6999, 7]
context.shape = torch.Size([1, 4])
logits.shape = torch.Size([1, 4, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 4, 267735])


 10%|█         | 1/10 [00:03<00:32,  3.56s/it]

context.shape = torch.Size([1, 5])
logits.shape = torch.Size([1, 5, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 5, 267735])


 20%|██        | 2/10 [00:07<00:28,  3.60s/it]

context.shape = torch.Size([1, 6])
logits.shape = torch.Size([1, 6, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 6, 267735])


 30%|███       | 3/10 [00:10<00:24,  3.55s/it]

context.shape = torch.Size([1, 7])
logits.shape = torch.Size([1, 7, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 7, 267735])


 40%|████      | 4/10 [00:12<00:19,  3.17s/it]

context.shape = torch.Size([1, 8])
logits.shape = torch.Size([1, 8, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 8, 267735])


 50%|█████     | 5/10 [00:15<00:14,  2.89s/it]

context.shape = torch.Size([1, 9])
logits.shape = torch.Size([1, 9, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 9, 267735])


 60%|██████    | 6/10 [00:17<00:11,  2.85s/it]

context.shape = torch.Size([1, 10])
logits.shape = torch.Size([1, 10, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 10, 267735])


 70%|███████   | 7/10 [00:20<00:08,  2.68s/it]

context.shape = torch.Size([1, 11])
logits.shape = torch.Size([1, 11, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 11, 267735])


 80%|████████  | 8/10 [00:22<00:05,  2.63s/it]

context.shape = torch.Size([1, 12])
logits.shape = torch.Size([1, 12, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 12, 267735])


 90%|█████████ | 9/10 [00:26<00:02,  2.82s/it]

context.shape = torch.Size([1, 13])
logits.shape = torch.Size([1, 13, 267735])
last_logits.shape = torch.Size([1, 267735])
END[topk] logits.shape = torch.Size([1, 13, 267735])


100%|██████████| 10/10 [00:29<00:00,  3.03s/it]

PROMPT: Cars were invented in
Britain and America , but the first two cars had





In [4]:
# logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
#                     datefmt = '%m/%d/%Y %H:%M:%S',
#                     level = logging.INFO)
# logger = logging.getLogger(__name__)

In [None]:
def top_k_logits(logits, k):
    """
    Masks everything but the k top entries as -infinity (1e10).
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if k == 0:
        return logits
    else:
        values = torch.topk(logits, k)[0]
        batch_mins = values[:, -1].view(-1, 1).expand_as(logits)
        return torch.where(logits < batch_mins, torch.ones_like(logits) * -1e10, logits)

    
def top_p_logits(logits, p):
    """
    Masks everything but the top-p entries as -infinity (1e10).
    
    Differently from `top_k_logits`, here we we don't take a fixed number
    k of elements in `logits`, but a fraction `p`
    of elements. These are the elements higher that the `p` percentile.
    
    Used to mask logits such that e^-infinity -> 0 won't contribute to the
    sum of the denominator.
    """
    if p == 1:
        return logits
    else:
        k = round(logits.shape[1] * p)
        print(f'top_p = {top_p:.1g}, k = {k}', flush=True)
        return top_k_logits(logits, k)

    
def sample_sequence(model, length, context, batch_size=None, 
                    temperature=1, top_k=0, top_p=None, device='cuda', sample=True):
    context = torch.tensor(context, device=device, dtype=torch.long).unsqueeze(0).repeat(batch_size, 1)
    prev = context
    output = context
    past = None
    with torch.no_grad():
        for i in trange(length):
            logits, past = model(prev, past=past)
            logits = logits[:, -1, :] / temperature
            if top_p is None:
                logits = top_k_logits(logits, k=top_k)
            else:
                logits = top_p_logits(logits, p=top_p)
            log_probs = F.softmax(logits, dim=-1)
            if sample:
                prev = torch.multinomial(log_probs, num_samples=1)
            else:
                _, prev = torch.topk(logits, k=1, dim=-1)
            output = torch.cat((output, prev), dim=1)
    return output

In [34]:
def encode_transformer_xl(text, encoder, device):
    text_tokenized = encoder.tokenize(text)
    text_indexed = encoder.convert_tokens_to_ids(text_tokenized)
    text_indexed_tensor = torch.tensor([text_indexed])
    text_indexed_tensor = text_indexed_tensor.to(device)
    return text_indexed_tensor

def run_model(
        prompt = None,
        batch_size = 1,
        nsamples = 1,    
        length = -1,
        temperature = 1,
        top_k = 0,
        top_p=None,
        sample = True,
        seed = 0,
        EOT = '<|endoftext|>',
    ):
    # Arguments checks
    assert nsamples % batch_size == 0
    assert prompt is not None and len(prompt) > 0
    
#     if length == -1:
#         length = model.config.n_ctx // 2
#     elif length > model.config.n_ctx:
#         raise ValueError("Can't get samples longer than window size: %s" % model.config.n_ctx)

    # Seed the random-number generators
    if seed is not None:
        np.random.seed(seed)
        torch.random.manual_seed(seed)
        torch.cuda.manual_seed(seed)
    
    # Encode prompt (str -> tokens -> tensor(vocabulary))
    context_tokens = encode_transformer_xl(prompt, tokenizer, device)

    # Generate an output text (multiple times if (nsamples / batch_size) > 1)
    generated = 0
    for _ in range(nsamples // batch_size):
        out = sample_sequence(
            model=model, length=length,
            context=context_tokens,
            batch_size=batch_size,
            temperature=temperature, top_k=top_k, device=device, sample=sample,
        )
        print(f'PROMPT: {prompt}')
        out = out[:, len(context_tokens):].tolist()
        for i in range(batch_size):
            generated += 1
            text = enc.decode(out[i])
            print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
            end = text.find(EOT)
            end = len(text) if end == -1 else end+len(EOT)
            print(text[:end])
    print("=" * 80)
    


In [35]:
seed = 0
np.random.seed(seed)
torch.random.manual_seed(seed)
torch.cuda.manual_seed(seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name_or_path = 'transfo-xl-wt103'
tokenizer = TransfoXLTokenizer.from_pretrained(model_name_or_path)
model = TransfoXLLMHeadModel.from_pretrained(model_name_or_path)
model.to(device)
model.eval()

TransfoXLLMHeadModel(
  (transformer): TransfoXLModel(
    (word_emb): AdaptiveEmbedding(
      (emb_layers): ModuleList(
        (0): Embedding(20000, 1024)
        (1): Embedding(20000, 256)
        (2): Embedding(160000, 64)
        (3): Embedding(67735, 16)
      )
      (emb_projs): ParameterList(
          (0): Parameter containing: [torch.FloatTensor of size 1024x1024]
          (1): Parameter containing: [torch.FloatTensor of size 1024x256]
          (2): Parameter containing: [torch.FloatTensor of size 1024x64]
          (3): Parameter containing: [torch.FloatTensor of size 1024x16]
      )
    )
    (drop): Dropout(p=0.1)
    (layers): ModuleList(
      (0): RelPartialLearnableDecoderLayer(
        (dec_attn): RelPartialLearnableMultiHeadAttn(
          (qkv_net): Linear(in_features=1024, out_features=3072, bias=False)
          (drop): Dropout(p=0.1)
          (dropatt): Dropout(p=0.0)
          (o_net): Linear(in_features=1024, out_features=1024, bias=False)
          (laye

In [9]:
model.config

{
  "adaptive": true,
  "attn_type": 0,
  "clamp_len": 1000,
  "cutoffs": [
    20000,
    40000,
    200000
  ],
  "d_embed": 1024,
  "d_head": 64,
  "d_inner": 4096,
  "d_model": 1024,
  "div_val": 4,
  "dropatt": 0.0,
  "dropout": 0.1,
  "ext_len": 0,
  "init": "normal",
  "init_range": 0.01,
  "init_std": 0.02,
  "mem_len": 1600,
  "n_head": 16,
  "n_layer": 18,
  "n_token": 267735,
  "pre_lnorm": false,
  "proj_init_std": 0.01,
  "same_length": true,
  "sample_softmax": -1,
  "tgt_len": 128,
  "tie_projs": [
    false,
    true,
    true,
    true
  ],
  "tie_weight": true,
  "untie_r": true
}

In [39]:
seed = 0
np.random.seed(seed)
torch.random.manual_seed(seed)
run_model('What do you know about Machine Learning and Natural Language Processing?', length=128)


  0%|          | 0/128 [00:00<?, ?it/s][A
  1%|          | 1/128 [00:02<05:15,  2.49s/it][A
  2%|▏         | 2/128 [00:04<05:00,  2.38s/it][A
  2%|▏         | 3/128 [00:06<04:46,  2.30s/it][A
  3%|▎         | 4/128 [00:08<04:36,  2.23s/it][A
  4%|▍         | 5/128 [00:10<04:31,  2.21s/it][A
  5%|▍         | 6/128 [00:13<04:25,  2.18s/it][A
  5%|▌         | 7/128 [00:15<04:18,  2.14s/it][A
  6%|▋         | 8/128 [00:17<04:12,  2.11s/it][A
  7%|▋         | 9/128 [00:19<04:08,  2.08s/it][A
  8%|▊         | 10/128 [00:21<04:04,  2.07s/it][A
  9%|▊         | 11/128 [00:23<03:59,  2.05s/it][A
  9%|▉         | 12/128 [00:25<03:56,  2.04s/it][A
 10%|█         | 13/128 [00:27<03:54,  2.04s/it][A
 11%|█         | 14/128 [00:29<03:51,  2.03s/it][A
 12%|█▏        | 15/128 [00:31<03:48,  2.03s/it][A
 12%|█▎        | 16/128 [00:33<03:46,  2.02s/it][A
 13%|█▎        | 17/128 [00:35<03:44,  2.02s/it][A
 14%|█▍        | 18/128 [00:37<04:03,  2.21s/it][A
 15%|█▍        | 19/128 [00:4

do you know about Machine Learning and Natural Language <unk> , <eos> <eos> = = = = <unk> = = = = <eos> <eos> The first recorded instance of a <unk> was in the early 20th century , when a man named <unk> was arrested for stealing a horse from a horse . He was charged with theft and sentenced to six months in prison . <eos> <eos> = = = = <unk> = = = = <eos> <eos> The first recorded instance of a <unk> was in the early 20th century , when a man named <unk> was arrested for stealing a horse from a horse . He was sentenced to six months in prison and fined Â£ 1 @,@ 000 . <eos> <eos> = = = = <unk> = = = = <eos> <eos> The first


In [24]:
seed = 0
for seed in range(10):
    np.random.seed(seed)
    torch.random.manual_seed(seed)
    run_model('What do you know about Machine Learning and Natural Language Processing?', length=128)

100%|██████████| 128/128 [00:09<00:00, 13.55it/s]
  0%|          | 0/128 [00:00<?, ?it/s]

 Isn't it remarkable how powerful it is to let your mind run free of everything that's changing around it?

We've worked at Machine Learning Hub for more than 20 years and have discovered that as a beginning along the way we are primed for processes that are indistinguishable from human behavior.

Our first example premiered at the exact moment in history when Norma Bolt, a templated data scientist with the former Company That Builds Everything, won first prize to shut her down. One year later Bolt decided to give Carlos Miller fiscal incentives to write an article on Machine Learning and left working on it (besides the Ask Me Anything


100%|██████████| 128/128 [00:10<00:00, 12.38it/s]
  1%|          | 1/128 [00:00<00:16,  7.85it/s]



If you're a teacher or teacher person and you take the time to learn more about Machine Learning Prediction Python written by three people smoking cigarettes: VThisuppolo Stefano aka 'teacher' and Asundri 'magic' Alibato

Published – May 22, 2018

PETROSOFT THUNDER

Artistic Attorney is one of the recent founders

developed by Pierre Tonkin Architects for the Jesuit university of Sepho on the outskirts of Groningen. The 42 year old TorboÍso Pascal reflects particularly on the process behind this project:

Pierre has been true to his


100%|██████████| 128/128 [00:09<00:00, 12.84it/s]
  1%|          | 1/128 [00:00<00:17,  7.27it/s]

 Student: Machine Learning Designers. Tarryl: Machine Learning Tools for Education Programs. Roger: Virtual Reality Knowledge Base.

4. CompuServe

Advantages:

- Awesome movies in online teacher education reviews.

- Much of their online sales are focused around student reviews.

- Page maintenance tools in user interfaces. Easy online assessment tool.

- Augmented class discussions for students

- Quality product reviews and maintainers that are extremely professional.

- Lots of pmsc or categories for students.

Many possibilities, for user becomes its own question particle patterns


100%|██████████| 128/128 [00:08<00:00, 14.74it/s]
  1%|          | 1/128 [00:00<00:15,  8.27it/s]



Machine learning doesn't just mean running checks to learn new evidence based on an experiment run. You also have to build a model that knows how much to be different. Machine learning can work inside your data and attack your data over time.

So how do you choose the right tool that you need, and what tools discourage you from using?

Learning and Training Is a 2nd Step to Thinking Pavlov's Big Ideas

Here's a short summary of what's being talked about on Machine Learning: you can read a lot more about Machine Learning before you sign up for DevOps or MySQL or Bootstrap business


100%|██████████| 128/128 [00:08<00:00, 15.71it/s]
  1%|          | 1/128 [00:00<00:14,  8.91it/s]




I bought MariaScript for Linux. It is extremely fast! It was ever using file system system when I was running under Windows, Python and other code in Windows, so oh yeah, here we go. It is not all human-readable PCI-Express, but it works and is pretty secure, it works as many Unix or Windows machines as it can, and I think it does it all pretty nice! There are numerous things Python hasn't yet learned, but I suspect it will provide has probably at least one of them. Probably around 16 or more frameworks that Alexandros Berskov will use in open source or he'll become


100%|██████████| 128/128 [00:09<00:00, 14.54it/s]
  1%|          | 1/128 [00:00<00:13,  9.46it/s]

 How do you use it in your applications? How do you enable it? 0 of 10 Next Get answers in a Quora question.

Natural Language Processing is the Processing of Data from One Medium by Jason Grossier

Daitsy writes: "The story of how artificial intelligence has come up with a very convincing model that predicts very heavily on our history: the path of a typical human home." That's an interesting line in the wrong direction. Only in 2017 have computer scientists quite managed to figure it out.

The year 2001 was the 'golden window' was set. Robin Kamms was the CIA director


100%|██████████| 128/128 [00:09<00:00, 14.16it/s]
  1%|          | 1/128 [00:00<00:14,  8.71it/s]

 You probably heard from many companies. Here are five that I realize are leading Silicon Valley companies that are leading these industries.

Step 1 – DevOps

Almost everyone I talk to who's considering jumping into the tech industry has stated their intent to start a take on DevOps. Creating and using Container Containerization for the IT industry is the most obvious example of CI — DevOps isn't about tracking down required employee iterations (".asp file info"File'dctlHelp", etc.) or cluttering up regular deploys to EOM's. But creating quick deployment pipelines for Docker containers is however the name. In your approach,


100%|██████████| 128/128 [00:08<00:00, 14.83it/s]
  1%|          | 1/128 [00:00<00:14,  8.90it/s]



Machine Learning engineers simply use Translate which is a data-rich algorithm, crafted to automate mathematical verification and store tens of billions of data streams in a secure database.

YOUR GLOBAL LEADER

How did your time at University Connect image manipulation workshop effort impact your skill set?

At Baccalaureate I found that we prioritized knowledge over experience & mentorship tenure, leadership, actual work, and keep it real. In addition to our many ancestors, our knowledgeable Chinese and Vietnamese colleagues learn through field education & talent exchange—contorically crafts that empower us to be as trustworthy as we


100%|██████████| 128/128 [00:09<00:00, 13.83it/s]
  1%|          | 1/128 [00:00<00:16,  7.89it/s]

 Let me know in the comments below!

Image credits: Wikimedia Commons , dboven, GPSImagesMagnet.com, iLibrary http://tsk<|endoftext|>NEW DELHI: India is facing a serious crisis over what can happen when it needs to cull 2,500 trainees once it stalks express train with a massive explosive discovered here late last month.

Experts say constraints imposed on trainmen by railway authorities and lawmakers have allowed the project to fail, leading to an "inability" to get train utilised in the next couple of weeks.

Officials point out that intrusive screening of element 500 used in Pakistan


100%|██████████| 128/128 [00:08<00:00, 15.26it/s]



Allen: Well, I first wrote about understanding machine learning after a really cold RSS feed that I've seen to the nearest 100,000 on Slack. Sort of back to hacker stuff. I've learned a lot about language driven AI through my experiences with AI modelling, how the big pin backpacks really work. Also, understanding the bigger problems is difficult but worth it so far. If you can print an entirely pre-programmed neural model and it turns out you can do pre-code which is interesting to know what you're doing with that sensor on your TV that the AI is chasing for.

And so claiming



