In [15]:
from transformers import pipeline
import pandas as pd
import csv
import re

In [19]:
#Creating empty CSV file
data = pd.DataFrame(columns=["Person Name", "Date of birth", "Email", "Education degree", "Location address", "Mobile Number"])
data.to_csv("info.csv", index=False)

In [20]:
# Create a question answering pipeline with the specified model
pipe = pipeline("question-answering", model="deepset/roberta-base-squad2")

# Define the information slots to extract
payload = {
    "Person Name": None,
    "Date of birth": None,
    "Email Id": None,
    "Education degree": None,
    "Location address": None,
    "Mobile Number": None,
}

missing_info = [key for key in payload.keys()]

def extract_information(text, missing_info):
    
    for question in missing_info:
        if question == 'Email Id':
            # Define a regular expression pattern for matching valid email addresses
            pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
            try:
                payload[question] = re.findall(pattern, text)[0]
            except:
                pass
        else:
            answer = pipe(question=question, context=text)
            if answer['score'] < 0.05:
                if len(missing_info) < 2:
                    payload[question] = answer['answer']
            else:
                payload[question] = answer['answer']


# Start a conversation loop
print("Bot: Hello!, Please give me some information about you.")
while True:
    user_input = input("\nUser: ")
    if user_input.lower() == "exit":
        break
    elif user_input.lower() == "continue":
        print("\n\nBot: Hello!, Please give me some information about you.")
        continue
        
    #Extracting NER
    extract_information(user_input, missing_info)

    # Check for missing information
    missing_info = [key for key, value in payload.items() if value is None]
    if missing_info:
        if 'Person Name' in missing_info:
            print("\nBot: Thanks!")
            print("Bot: Hi, may I know: {}".format(', '.join(missing_info)))
        else:
            print("\nBot: Thanks!")
            print("Bot: Hi, may I know your: {}".format(', '.join(missing_info)))
    else:
        
        # Check if the dictionary is already in the CSV file before appending
        existing_data = []
        with open("info.csv", mode='r', newline='') as file:
            reader = csv.DictReader(file)
            for row in reader:
                existing_data.append(row)
    
        # Check if data_row already exists in the CSV
        if payload not in existing_data:
            # Append the dictionary as a row to the CSV file
            with open("info.csv", mode='a', newline='') as file:
                writer = csv.DictWriter(file, fieldnames=payload.keys())
                writer.writerow(payload)
            print("Bot: Congratulation!, your form is filled.\n")
        else:
            print("Bot: Your form is already filled!!!!!!.\n")
        
        
            
        # Define the information slots to extract
        payload = {
            "Person Name": None,
            "Date of birth": None,
            "Email Id": None,
            "Education degree": None,
            "Location address": None,
            "Mobile Number": None,
        }
        print("Bot: Thanks!\n")
        print("Bot: You can exit by typing 'exit'")
        print("Bot: You can continue with another form by typing 'continue'")

Bot: Hello!, Please give me some information about you.

User: Hello, I'm Ravi Kumar and I live at 123 Main Street, Anytown, CA 12345. I have a PhD in Computer Science from Stanford University. My mobile number is 123-456-7890

Bot: Thanks!
Bot: Hi, may I know your: Date of birth, Email Id

User: date of birth is 06/06/1999 and email is rohan.kumar@gmail.com
Bot: Congratulation!, your form is filled.

Bot: Thanks!

Bot: You can exit by typing 'exit'
Bot: You can continue with another form by typing 'continue'

User: exit
