In [1]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
from transformers import AutoConfig
from transformers import GPT2LMHeadModel, GPT2Tokenizer
# load pre-trained gpt2-medium weights
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
# add our own special token to tokenizer and model
bos = '<|endoftext|>'
eos = '<|EOS|>'
body = '<|body|>'
special_tokens_dict = {'eos_token': eos, 'bos_token': bos, 'pad_token': '<pad>',
                       'sep_token': body}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
config = AutoConfig.from_pretrained('gpt2-medium', 
                                    bos_token_id=tokenizer.bos_token_id,
                                    eos_token_id=tokenizer.eos_token_id,
                                    pad_token_id=tokenizer.pad_token_id,
                                    sep_token_id=tokenizer.sep_token_id,
                                    output_hidden_states=False)
model = GPT2LMHeadModel.from_pretrained('gpt2-medium', config=config)
model.resize_token_embeddings(len(tokenizer))

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Embedding(50260, 1024)

In [3]:
#load dataset
import pandas as pd
df = pd.read_csv('QA.csv')
df

Unnamed: 0,Q,A
0,How can I unlock my iPhone if I forgot my pass...,You can unlock your iPhone by following the st...
1,What should I do if my iPhone is not charging?,"If your iPhone is not charging, try using a di..."
2,Can I get help with using a specific app from ...,"Yes, Apple customer support can offer assistan..."
3,How can I download the latest software updates...,You can download the latest software updates b...
4,Can I get a refund for an app or subscription ...,"Yes, you may be eligible for a refund for cert..."
...,...,...
573,"Okay, I'll do that now.","Once your phone is updated, download a reputab..."
574,"Hi, I accidentally deleted some important phot...","Yes, there is! Do you have a backup of your ph..."
575,"Yes, I do.","Great, you can restore your phone from the bac..."
576,"Hi, my iPhone won't turn on. What should I do?",I'm sorry to hear that! Let's try force restar...


In [4]:
# add start token, sep token and end token to join question text and answer text
prepare_text = lambda x: ' '.join([bos, x['Q'], body, x['A'], eos])
df['text'] = df.apply(prepare_text, axis=1)
df

Unnamed: 0,Q,A,text
0,How can I unlock my iPhone if I forgot my pass...,You can unlock your iPhone by following the st...,<|endoftext|> How can I unlock my iPhone if I ...
1,What should I do if my iPhone is not charging?,"If your iPhone is not charging, try using a di...",<|endoftext|> What should I do if my iPhone is...
2,Can I get help with using a specific app from ...,"Yes, Apple customer support can offer assistan...",<|endoftext|> Can I get help with using a spec...
3,How can I download the latest software updates...,You can download the latest software updates b...,<|endoftext|> How can I download the latest so...
4,Can I get a refund for an app or subscription ...,"Yes, you may be eligible for a refund for cert...",<|endoftext|> Can I get a refund for an app or...
...,...,...,...
573,"Okay, I'll do that now.","Once your phone is updated, download a reputab...","<|endoftext|> Okay, I'll do that now. <|body|..."
574,"Hi, I accidentally deleted some important phot...","Yes, there is! Do you have a backup of your ph...","<|endoftext|> Hi, I accidentally deleted some ..."
575,"Yes, I do.","Great, you can restore your phone from the bac...","<|endoftext|> Yes, I do. <|body|> Great, you ..."
576,"Hi, my iPhone won't turn on. What should I do?",I'm sorry to hear that! Let's try force restar...,"<|endoftext|> Hi, my iPhone won't turn on. Wha..."


In [5]:
# fine tune GPT2 model
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import TrainingArguments, DataCollatorForLanguageModeling, Trainer
df_train, df_val = train_test_split(df, train_size = 0.9, random_state = 77)
train_dataset = Dataset.from_pandas(df_train[['text']])
val_dataset = Dataset.from_pandas(df_val[['text']])
def tokenize_function(examples):
        return tokenizer(examples['text'], padding=True)
tokenized_train_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    num_proc=1
)
tokenized_val_dataset = val_dataset.map(
    tokenize_function,
    batched=True,
    num_proc=1
)

model_articles_path = './GPT2'
training_args = TrainingArguments(
    output_dir=model_articles_path,          
    num_train_epochs=50,              
    per_device_train_batch_size=8,  
    per_device_eval_batch_size=8,   
    warmup_steps=200,                
    weight_decay=0.01,              
    logging_dir=model_articles_path,         
    prediction_loss_only=True,
    save_steps=10000,
    evaluation_strategy="steps",
    eval_steps=500
)
data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )
trainer = Trainer(
    model=model.to(device),                        
    args=training_args,                  
    data_collator=data_collator,
    train_dataset=tokenized_train_dataset,         
    eval_dataset=tokenized_val_dataset,           
    
)

Map:   0%|          | 0/520 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

In [6]:
trainer.train()



Step,Training Loss,Validation Loss
500,4.1741,1.048348
1000,0.2546,1.349135
1500,0.1771,1.3857
2000,0.1503,1.500523
2500,0.1369,1.554368
3000,0.1297,1.60567


TrainOutput(global_step=3250, training_loss=0.7823011991060698, metrics={'train_runtime': 945.8085, 'train_samples_per_second': 27.49, 'train_steps_per_second': 3.436, 'total_flos': 4999021707264000.0, 'train_loss': 0.7823011991060698, 'epoch': 50.0})

In [7]:
trainer.save_model()
tokenizer.save_pretrained(model_articles_path)

('./GPT2/tokenizer_config.json',
 './GPT2/special_tokens_map.json',
 './GPT2/vocab.json',
 './GPT2/merges.txt',
 './GPT2/added_tokens.json')

In [15]:
def generate_n_text_samples(model, tokenizer, input_text):
    text_ids = tokenizer(input_text, return_tensors = 'pt')
    text_ids = text_ids
    model = model
    # use model.generate to generate the responses
    generated_text_samples = model.generate(
        **text_ids, 
        max_length= 50,  
        num_return_sequences= 1,
        no_repeat_ngram_size= 2,
        repetition_penalty= 1.5,
        top_p= 0.92,
        temperature= .85,
        do_sample= True,
        top_k= 125,
        early_stopping= True
    )
    #decode the response from vocab index
    for t in generated_text_samples:
        text = tokenizer.decode(t, skip_special_tokens=True)
        return text


In [16]:
model_articles_path = './GPT2'
new_model = GPT2LMHeadModel.from_pretrained(model_articles_path)
new_tokenizer = GPT2Tokenizer.from_pretrained(model_articles_path)
bos = new_tokenizer.bos_token
eos = new_tokenizer.eos_token
sep = new_tokenizer.sep_token
q = 'How can I unlock my iPhone if I forgot my passcode?'
q_new = ' '.join([bos, q,  sep])
content = generate_n_text_samples(new_model, new_tokenizer, q_new)[len(q)+1:]
print(content)

You may be able to use iTunes or iCloud, depending on your device's brand and model.


In [18]:
print("Chatbot: Hello, how can I help you?")
q = input("User:")
while q != "exit":
    # add start token in front of user input and sep token at the end of user input
    q_new = ' '.join([bos, q,  sep])
    #start generating responses
    content = generate_n_text_samples(new_model, new_tokenizer, q_new)
    content = content[len(q)+1:]
    print(f"Chatbot:{content}")
    q = input("User:")

Chatbot: Hello, how can I help you?


User: How can I troubleshoot issues with my Apple Watch not charging?


Chatbot:You may need to try resetting the device or checking for software updates.


User: Can I get help with setting up and using the Wallet app on my iPhone or iPad?


Chatbot:Yes, Apple customer support can provide assistance in settingsupandusingtheWalletappon youriPhoneoripad to store all of its data.


User: How can I troubleshoot issues with my iPhone not charging or charging slowly?


Chatbot:You may need to try a different charger and cable, check the case of your device for any dirt that might be obstructing it, as well.


User: How can I troubleshoot issues with my Apple Watch not updating or installing software?


Chatbot:You may be able to check your settings and storage by going through the "Settings" app onyour iPhone, selecting iCloud from there.


User: How can I troubleshoot issues with my Mac computer not playing sound or having audio problems?


Chatbot:You may be able to try resetting the volume and settings on your Apple TV, checking for software updates available through that provider's website.


User: How can I troubleshoot issues with my Mac computer not playing sound or having audio problems?


Chatbot:You may be able to try resetting the volume and settings on your Apple TV, checking for any software updates available through its website, as well


User: How can I troubleshoot issues with my Mac computer not playing sound or having audio problems?


Chatbot:You may be able to try resetting the PRAM and SMC, checking for any hardware damage. You also might need a different PROM


User: Can I get help with setting up and using the Safari browser on my Mac computer or iPhone/iPad?


Chatbot:Yes, Apple customer support can provide assistance accessing various features of your macOS operating system, such as updating software sources.


User: Can I get help with setting up and using the Safari browser on my Mac computer or iPhone/iPad?


Chatbot:Yes, Apple customer support can provide assistance in settingsupandusingthesafaribrowseron yourmaccomputeroriphone/#setting


User: Can I get help with setting up and using the Messages app on my Mac computer or iPhone?


Chatbot:Yes, Apple customer support can provide assistance through settings & messaging apps.


User: How can I troubleshoot issues with my Mac computer not playing or recognizing my DVDs or CDs?


Chatbot:You may be able to try following the steps outlined onthe Apple support website, such as checking them for updates and cleaning.


User: Can I get help with setting up and using the Siri feature on my Mac computer or iPhone?


Chatbot:Yes, Apple customer support can provide assistance in settingsupandusingtheSiriFeatureOnYourMacComputerOriPhone to use voice commands & control


User: How can I troubleshoot issues with my iPhone camera not working or taking blurry photos?


Chatbot:You may need to try resetting your device, checking for software updates and cleaning the lens.


User: How can I troubleshoot issues with my iPad not charging or turning on?


Chatbot:You may be able to try resetting the device by holding down both power and home buttons simultaneously until Apple Music app appears.


User: How can I troubleshoot issues with my iPad not charging or turning on?


Chatbot:You may need to try a different power source, connect it properly and then erase the data.


User: How can I troubleshoot issues with my iPad not charging or turning on?


Chatbot:You may be able to try resetting the device by holding down and power button for 15 seconds at a time.


User: exit
