In [2]:
import tensorflow as tf
import tensorflow_hub as hub
from transformers import BertTokenizer

In [35]:
"""Question Answering"""

def question_answer(question, reference):
    """Finds a snippet of text within a reference document to answer
        a question:
        question is a string containing the question to answer.
        reference is a string containing the reference document from which to
            find the answer.
        Returns: a string containing the answer.
        If no answer is found, return None.
        Your function should use the bert-uncased-tf2-qa model from the
            tensorflow-hub library.
        Your function should use the pre-trained BertTokenizer,
            bert-large-uncased-whole-word-masking-finetuned-squad,
            from the transformers library."""
    model = hub.load('https://tfhub.dev/see--/bert-uncased-tf2-qa/1')
    tokenizer = BertTokenizer.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')

    input_ids = tokenizer.encode(question, reference)
    input_mask = [1] * len(input_ids)
    input_type_ids = [0 if i < input_ids.index(102) else 1
                      for i in range(len(input_ids))]

    input_ids = tf.constant([input_ids])
    input_mask = tf.constant([input_mask])
    input_type_ids = tf.constant([input_type_ids])

    outputs = model([input_ids, input_mask, input_type_ids])
    start_index = tf.argmax(outputs[0][0][1:]) + 1
    end_index = tf.argmax(outputs[1][0][1:]) + 2

    answer_tokens = tokenizer.convert_ids_to_tokens(
        input_ids[0][start_index:end_index])
    answer = tokenizer.convert_tokens_to_string(answer_tokens)

    if answer == '[CLS]':
        return None

    return answer

In [19]:
file_url = '/content/drive/MyDrive/Colab Notebooks/data'

In [36]:
# question_answer = __import__('0-qa').question_answer

with open('/content/drive/MyDrive/Colab Notebooks/data/ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

print(question_answer('When are PLDs?', reference))

on - site days from 9 : 00 am to 3 : 00 pm


In [1]:
"""Create the loop"""

while True:
    user_input = input('Q: ')
    if user_input.lower() in ['exit', 'quit', 'goodbye', 'bye']:
        print('A: Goodbye')
        break
    else:
        print('A: ')


Q: a
A: 
Q: hdeohoe
A: 
Q: How
A: 
Q: exit
A: Goodbye


In [31]:
"""Answer Questions"""

def answer_loop(reference):
    """Answers questions from a reference text:
        reference is the reference text.
        If the answer cannot be found in the reference text,
        respond with Sorry, I do not understand your question."""
    model = hub.load('https://tfhub.dev/see--/bert-uncased-tf2-qa/1')
    tokenizer = BertTokenizer.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')

    while True:
        question = input('Q: ')
        if question.lower() in ["exit", "quit", "goodbye", "bye"]:
            print("A: Goodbye")
            break

        input_ids = tokenizer.encode(question, reference)
        input_mask = [1] * len(input_ids)
        input_type_ids = [0 if i < input_ids.index(102) else 1
                        for i in range(len(input_ids))]

        input_ids = tf.constant([input_ids])
        input_mask = tf.constant([input_mask])
        input_type_ids = tf.constant([input_type_ids])

        outputs = model([input_ids, input_mask, input_type_ids])
        start_index = tf.argmax(outputs[0][0][1:]) + 1
        end_index = tf.argmax(outputs[1][0][1:]) + 2

        answer_tokens = tokenizer.convert_ids_to_tokens(
            input_ids[0][start_index:end_index])
        answer = tokenizer.convert_tokens_to_string(answer_tokens)

        if answer:
            print('A: ' + answer)
        else:
            print('A: Sorry, I do not understand your question.')

In [32]:
# answer_loop = __import__('2-qa').answer_loop

with open('/content/drive/MyDrive/Colab Notebooks/data/ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

answer_loop(reference)

Q: When are PLDs?
A: on - site days from 9 : 00 am to 3 : 00 pm
Q: What are Mock Interviews?
A: Sorry, I do not understand your question.
Q: What does PLD stand for?
A: peer learning days
Q: EXIT
A: Goodbye


In [7]:
import numpy as np
import os
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [21]:
"""Semantic Search"""

def semantic_search(corpus_path, sentence):
    """Performs semantic search on a corpus of documents:
        corpus_path is the path to the corpus of reference documents on which
            to perform semantic search.
        sentence is the sentence from which to perform semantic search.
        Returns: the reference text of the document most similar to
            sentence."""
    corpus_sentences = []
    for filename in os.listdir(corpus_path):
        if filename.endswith('.md'):
            with open(os.path.join(
                    corpus_path, filename), 'r', encoding='utf-8') as file:
                corpus_sentences.append(file.read())

    corpus_sentences.append(sentence)

    vectorizer = TfidfVectorizer()
    corpus_embeddings = vectorizer.fit_transform(corpus_sentences)

    similarities = cosine_similarity(
        corpus_embeddings[-1:], corpus_embeddings[:-1])

    most_similar_index = similarities.argmax()

    return corpus_sentences[most_similar_index]

In [22]:
# semantic_search = __import__('3-semantic_search').semantic_search

print(semantic_search('/content/drive/MyDrive/Colab Notebooks/data/ZendeskArticles', 'When are PLDs?'))

PLD Overview
Peer Learning Days (PLDs) are a time for you and your peers to ensure that each of you understands the concepts you've encountered in your projects, as well as a time for everyone to collectively grow in technical, professional, and soft skills. During PLD, you will collaboratively review prior projects with a group of cohort peers.
PLD Basics
PLDs are mandatory on-site days from 9:00 AM to 3:00 PM. If you cannot be present or on time, you must use a PTO. 
No laptops, tablets, or screens are allowed until all tasks have been whiteboarded and understood by the entirety of your group. This time is for whiteboarding, dialogue, and active peer collaboration. After this, you may return to computers with each other to pair or group program. 
Peer Learning Days are not about sharing solutions. This doesn't empower peers with the ability to solve problems themselves! Peer learning is when you share your thought process, whether through conversation, whiteboarding, debugging, or li

In [23]:
"""Multi-reference Question Answering"""
def question_answer(corpus_path):
    """Answers questions from multiple reference texts:
        corpus_path is the path to the corpus of reference documents."""
    model = hub.load('https://tfhub.dev/see--/bert-uncased-tf2-qa/1')
    tokenizer = BertTokenizer.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')

    while True:
        question = input('Q: ')
        if question.lower() in ["exit", "quit", "goodbye", "bye"]:
            print("A: Goodbye")
            break

        reference = semantic_search(corpus_path, question)
        input_ids = tokenizer.encode(question, reference)
        input_mask = [1] * len(input_ids)
        input_type_ids = [0 if i < input_ids.index(102) else 1
                        for i in range(len(input_ids))]

        input_ids = tf.constant([input_ids])
        input_mask = tf.constant([input_mask])
        input_type_ids = tf.constant([input_type_ids])

        outputs = model([input_ids, input_mask, input_type_ids])
        start_index = tf.argmax(outputs[0][0][1:]) + 1
        end_index = tf.argmax(outputs[1][0][1:]) + 2

        answer_tokens = tokenizer.convert_ids_to_tokens(
            input_ids[0][start_index:end_index])
        answer = tokenizer.convert_tokens_to_string(answer_tokens)

        if answer:
            print('A: ' + answer)
        else:
            print('A: Sorry, I do not understand your question.')

In [24]:
# question_answer = __import__('4-qa').question_answer

question_answer('/content/drive/MyDrive/Colab Notebooks/data/ZendeskArticles')

Q: When are PLDs?
A: on - site days from 9 : 00 am to 3 : 00 pm
Q: What are Mock Interviews?
A: help you train for technical interviews
Q: What does PLD stand for?
A: stand up
Q: goodbye
A: Goodbye
