In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [11]:
# use generate function on gpt2 model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")


prompt = "It is important for all countries to try harder to reduce carbon emissions because"
input_ids = tokenizer.encode(prompt, return_tensors="pt")

# output is 30 tokens long and print probability of each token
output = model.generate(input_ids, max_length=30, do_sample=False, top_k=1, top_p=0.01, temperature=0.9, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
tokenizer.batch_decode(output.sequences, skip_special_tokens=True)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


['It is important for all countries to try harder to reduce carbon emissions because it is a key part of the global economy.\n\n"We need to']

In [12]:
print(output)

GreedySearchDecoderOnlyOutput(sequences=tensor([[1026,  318, 1593,  329,  477, 2678,  284, 1949, 7069,  284, 4646, 6588,
         8971,  780,  340,  318,  257, 1994,  636,  286,  262, 3298, 3773,   13,
          198,  198,    1, 1135,  761,  284]]), scores=(tensor([[-126.3008, -125.6421, -133.5102,  ..., -132.7888, -133.6604,
         -127.7045]]), tensor([[-132.1247, -130.5210, -138.0126,  ..., -140.9460, -139.2059,
         -132.6104]]), tensor([[-135.8139, -135.3814, -143.6343,  ..., -143.6512, -141.7850,
         -138.9233]]), tensor([[-147.2896, -145.7075, -151.7347,  ..., -157.4900, -153.2661,
         -148.2787]]), tensor([[-113.9601, -111.4267, -118.7147,  ..., -121.8256, -122.8286,
         -116.0324]]), tensor([[-61.5197, -57.9800, -64.9529,  ..., -70.8180, -72.1236, -62.2383]]), tensor([[-108.9445, -106.4659, -111.8005,  ..., -116.4430, -115.6365,
         -108.9539]]), tensor([[-140.2385, -137.1673, -141.4753,  ..., -145.5077, -148.0972,
         -139.7022]]), tensor([[-140

In [13]:
print(len(output.scores[:]))

16


In [14]:
# softmax of output scores
softmax = torch.nn.Softmax(dim=0)

In [22]:
for i in range(16):
	print(torch.argmax(softmax(output.scores[i][0])))
	print(max(softmax(output.scores[i][0])))

tensor(340)
tensor(0.1615)
tensor(318)
tensor(0.3339)
tensor(257)
tensor(0.1505)
tensor(1994)
tensor(0.0698)
tensor(636)
tensor(0.1504)
tensor(286)
tensor(0.9890)
tensor(262)
tensor(0.2642)
tensor(3298)
tensor(0.1169)
tensor(3773)
tensor(0.1967)
tensor(13)
tensor(0.3324)
tensor(198)
tensor(0.2185)
tensor(198)
tensor(0.9995)
tensor(1)
tensor(0.2831)
tensor(1135)
tensor(0.1403)
tensor(761)
tensor(0.2363)
tensor(284)
tensor(0.7871)


In [96]:
# store the top 10 tokens and their probabilities from output scores
top10 = []
for i in range(len(output.scores[:])):
	# softmax of output scores
	softmax = torch.nn.Softmax(dim=0)
	# get the top 10 tokens and their probabilities
	soft = softmax(output.scores[i][0])
	top10.append(torch.topk(soft, 3))
# print shape of top10
print(len(top10))


16


In [135]:
print(top10[2][1])

tensor([257, 262, 407])


In [9]:
import treelib
# get top3 tokens and their probabilities at each position for depth 3
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")
prompt = "It is important for all countries to try harder to reduce carbon emissions because"
# softmax of output scores
softmax = torch.nn.Softmax(dim=0)
# tree creation
tree = treelib.Tree()
tree.create_node("root", "root")
for i in range(1):
	input_ids = tokenizer.encode(prompt, return_tensors="pt")
	# output is 30 tokens long and print probability of each token
	output = model.generate(input_ids, max_length=20, do_sample=False, top_k=1, top_p=0.1, temperature=0.9, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
	# get the top 10 tokens and their probabilities
	soft = softmax(output.scores[0][0])
	words = torch.topk(soft, 3)
	# print(words[1][0])
	word = tokenizer.batch_decode(words[1], skip_special_tokens=True)
	# word2 = tokenizer.batch_decode(words[1][, skip_special_tokens=True)
	# word3 = tokenizer.batch_decode(words[1][2], skip_special_tokens=True)
	for j in range(3):
		text = prompt + " " + word[j]
		# print(text)
		tree.create_node(word[j], "j"+str(j), parent="root", data=words[0][j])
		input_ids = tokenizer.encode(text, return_tensors="pt")
		# output is 30 tokens long and print probability of each token
		output = model.generate(input_ids, max_length=20, do_sample=False, top_k=1, top_p=0.1, temperature=0.9, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
		# get the top 10 tokens and their probabilities
		soft = softmax(output.scores[0][0])
		words_new = torch.topk(soft, 3)
		# print(words[1][0])
		word_new = tokenizer.batch_decode(words_new[1], skip_special_tokens=True)
		# word2 = tokenizer.batch_decode(words[1][, skip_special_tokens=True)
		# word3 = tokenizer.batch_decode(words[1][2], skip_special_tokens=True)
		for k in range(3):
			# print(t)
			tree.create_node(word_new[k], "k"+str(k)+"j"+str(j), parent="j"+str(j), data=words_new[0][k])
			text = prompt + " " + word[j] + " " + word_new[k]
			input_ids = tokenizer.encode(text, return_tensors="pt")
			# output is 30 tokens long and print probability of each token
			output = model.generate(input_ids, max_length=30, do_sample=False, top_k=1, top_p=0.1, temperature=0.9, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
			# get the top 10 tokens and their probabilities
			soft = softmax(output.scores[0][0])
			words_new2 = torch.topk(soft, 3)
			# print(words[1][0])
			word_new2 = tokenizer.batch_decode(words_new2[1], skip_special_tokens=True)
			# word2 = tokenizer.batch_decode(words[1][, skip_special_tokens=True)
			# word3 = tokenizer.batch_decode(words[1][2], skip_special_tokens=True)
			for l in range(3):
				# print(t)
				tree.create_node(word_new2[l], "l"+str(l)+"k"+str(k)+"j"+str(j), parent="k"+str(k)+"j"+str(j), data=words_new2[0][l])
				text = prompt + " " + word[j] + " " + word_new[k] + " " + word_new2[l]
				input_ids = tokenizer.encode(text, return_tensors="pt")
				# output is 30 tokens long and print probability of each token
				output = model.generate(input_ids, max_length=40, do_sample=False, top_k=1, top_p=0.1, temperature=0.9, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
				# get the top 10 tokens and their probabilities
				soft = softmax(output.scores[0][0])
				words_new3 = torch.topk(soft, 3)
				# print(words[1][0])
				word_new3 = tokenizer.batch_decode(words_new3[1], skip_special_tokens=True)
				# word2 = tokenizer.batch_decode(words[1][, skip_special_tokens=True)
				# word3 = tokenizer.batch_decode(words[1][2], skip_special_tokens=True)
				for m in range(3):
					# print(t)
					tree.create_node(word_new3[m], "m"+str(m)+"l"+str(l)+"k"+str(k)+"j"+str(j), parent="l"+str(l)+"k"+str(k)+"j"+str(j), data=words_new3[0][m])
					# text = prompt + " " + word[j] + " " + word_new[k] + " " + word_new2[l] + " " + word_new3[m]
					# input_ids = tokenizer.encode(text, return_tensors="pt")
					# # output is 30 tokens long and print probability of each token
					# output = model.generate(input_ids, max_length=40, do_sample=False, top_k=1, top_p=0.1, temperature=0.9, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
					# # get the top 10 tokens and their probabilities
					# soft = softmax(output.scores[0][0])
					# words_new4 = torch.topk(soft, 3)
					# # print(words[1][0])
					# word_new4 = tokenizer.batch_decode(words_new4[1], skip_special_tokens=True)
					# word2 = tokenizer.batch_decode(words[1][, skip_special_tokens=True)
					# word3 = tokenizer.batch_decode(words[1][2], skip_special_tokens=True)
# 					for n in range(3):
# 						# print(t)
# 						tree.create_node(word_new4[n], "n"+str(n)+"m"+str(m)+"l"+str(l)+"k"+str(k)+"j"+str(j), parent="m"+str(m)+"l"+str(l)+"k"+str(k)+"j"+str(j), data=words_new4[0][n])
# 						# text = prompt + " " + word[j] + " " + word_new[k] + " " + word_new2[l] + " " + word_new3[m] + " " + word_new4[n]
# 						# input_ids = tokenizer.encode(text, return_tensors="pt")
# 						# # output is 30 tokens long and print probability of each token
# 						# output = model.generate(input_ids, max_length=50, do_sample=False, top_k=1, top_p=0.1, temperature=0.9, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
# 						# # get the top 10 tokens and their probabilities
# 						# soft = softmax(output.scores[0][0])
# 						# words_new5 = torch.topk(soft, 3)
# 						# # print(words[1][0])
# 						# word_new5 = tokenizer.batch_decode(words_new5[1], skip_special_tokens=True)
# 						# # word2 = tokenizer.batch_decode(words[1][, skip_special_tokens=True)
# 						# # word3 = tokenizer.batch_decode(words[1][2], skip_special_tokens=True)
# 						# for o in range(3):
# 						# 	# print(t)
# 						# 	tree.create_node(word_new5[o], "o"+str(o)+"n"+str(n)+"m"+str(m)+"l"+str(l)+"k"+str(k)+"j"+str(j), parent="n"+str(n)+"m"+str(m)+"l"+str(l)+"k"+str(k)+"j"+str(j), data=words_new5[0][o])
# 						# 	text = prompt + " " + word[j] + " " + word_new[k] + " " + word_new2[l] + " " + word_new3[m] + " " + word_new4[n] + " " + word_new5[o]
# 						# 	input_ids = tokenizer.encode(text, return_tensors="pt")
# 						# 	# output is 30 tokens long and print probability of each token
# 						# 	output = model.generate(input_ids, max_length=60, do_sample=False
tree.show()


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

root
├──  it
│   ├──  can
│   │   ├──  be
│   │   │   ├──  a
│   │   │   ├──  difficult
│   │   │   └──  very
│   │   ├──  help
│   │   │   ├──  reduce
│   │   │   ├──  to
│   │   │   └──  us
│   │   └──  reduce
│   │       ├──  emissions
│   │       ├──  greenhouse
│   │       └──  the
│   ├──  is
│   │   ├──  a
│   │   │   ├──  key
│   │   │   ├──  major
│   │   │   └──  very
│   │   ├──  important
│   │   │   ├──  for
│   │   │   ├──  that
│   │   │   └──  to
│   │   └──  the
│   │       ├──  cheapest
│   │       ├──  most
│   │       └──  only
│   └──  will
│       ├──  be
│       │   ├──  a
│       │   ├──  difficult
│       │   └──  very
│       ├──  help
│       │   ├──  reduce
│       │   ├──  to
│       │   └──  us
│       └──  reduce
│           ├──  global
│           ├──  greenhouse
│           └──  the
├──  the
│   ├──  climate
│   │   ├──  change
│   │   │   ├──  
│   │   │   ├──  is
│   │   │   └──  will
│   │   ├──  crisis
│   │   │   ├──  
│   │   │   ├──  has
│   │   