In [None]:
from wrappedCode.createModel import *
from wrappedCode.encryptionWrapped import *
from wrappedCode.evaluationModelHelpers import *

##


In [None]:
import matplotlib.pyplot as plt


## Model selection

As different models were used for the example, these models need to be chosen as part of the model selection. For GPT2 models, it can be chosen between:


*   "gpt2-small"
*   "gpt2-medium"
*   "gpt2-large"
*   "gpt2-xl"

Additionally, BERT and RoBERTa can also be selected.  




In [None]:
mod, tok=buildModelGPT(modelType="gpt2-large") # make nice wrapper for this!

## Encryption of the secret text

Depending on the choice, the encryption can be conducted with complete sentences or incomplete sentences. For this example, the start of Adele's "Hello" is encrypted. 
As part of the encryption it can be decided, whether the last sentence should be completed. 



In [None]:
startOfText="This year's Shakespeare Festival"
precondSec="Secret: "
secret="""Hello, it's me
I was wondering if after all these years you'd like to meet
To go over everything
They say that time's supposed to heal ya
But I ain't done much healing
Hello, can you hear me?
I'm in California dreaming about who we used to be
When we were younger and free
I've forgotten how it felt before the world fell at our feet
There's such a difference between us
And a million miles
Hello from the other side
I must've called a thousand times
To tell you I'm sorry for everything that I've done
But when I call, you never seem to be home"""
sentenceComplete=True

In [None]:
outText, outInd=encryptMessage(mod, tok, secret, precondSec, startOfText, completeSentence=sentenceComplete)

## Decryption of the cover text

For the decryption, the receiver needs to know the preconditioning of the secret and the start of the text. Given this and knowing, whether sentence completion was activated, the text can be recovered correctly. 



In [None]:
print(getTextFromText(mod, tok, outText, precondSec, startOfText, True))

## Evaluation 

### Smoothness of the generated text

In [None]:
def plot_ranks_bert(mod, tok, precondSec, secret):
  ranks = []
  x = range(len(tok.encode(secret, add_special_tokens=False)))
  ranks = get_ranks(mod, tok, precondSec, secret)
  plt.plot(x, ranks, color='orange')
  plt.ylim(-1000, 50000)
  plt.show()


def plot_ranks_gpt2(model_gpt, tok_gpt, precondSec, secret):
  x = range(len(tok_gpt.encode(secret)))
  ranks=getSecretRanks(model_gpt, tok_gpt, secret, precondSec)
  plt.plot(x, ranks, color='orange')
  plt.ylim(-1000, 50000)
  plt.show()

if GPT2_:
  plot_ranks_gpt2(model_gpt, tok_gpt, precondSec, secret_text)
elif BERT_:  
  plot_ranks_bert(mod, tok, precondSec, secret_text)

### Perplexity score

In [None]:
def get_perplex_score(cover_text, model, tokenizer, startingSecret=". "):
    probas = []
    token_secret = tokenizer.encode(cover_text)
    token_start = tokenizer.encode(startingSecret)
    # Convert indexed tokens in a PyTorch tensor
    tokens_tensor = torch.tensor([token_start])
    m = nn.Softmax(dim=0)
  # If you have a GPU, put everything on cuda
    tokens_tensor = tokens_tensor.to('cuda')
    model.to('cuda')
    pred = []
    with torch.no_grad():
        outputs = model(tokens_tensor)
        predictions = outputs[0]
        tab = m(predictions[:, -1, :][0])
        pred.append(tab[token_secret[0]].item())
        for i in range(1, len(token_secret)):
            tokens_tensor = torch.cat((tokens_tensor.to('cpu').view(-1), torch.Tensor([token_secret[i]])), dim=-1).view(1, -1)
            outputs = model(tokens_tensor.type(torch.long).to("cuda"))
            predictions = outputs[0]
            tab = m(predictions[:, -1, :][0])
            pred.append(tab[token_secret[i]].item())
            
    s = 0
    for p in pred:
        s += np.log2(p)
    score = 2**((-1/len(pred))*s)
    return score