# **1. Installing Packages**

In [None]:
!pip install faiss-cpu
!pip install sentence-transformers
!pip install google-generativeai
!pip install nltk
!pip install textstat
!pip install python-dotenv

# **2. Importing Libraries**

In [2]:
# Import necessary libraries
import nltk
import os
import re
import string
from collections import Counter
import faiss
import numpy as np
from dotenv import load_dotenv
from nltk.corpus import stopwords
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize, sent_tokenize
from sentence_transformers import SentenceTransformer
from textstat import flesch_kincaid_grade
import google.generativeai as genai
import warnings
from google.colab import userdata
import ipywidgets as widgets
from IPython.display import display, HTML
import tempfile
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")

# Configure the Google Generative AI API
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)


# **3. Installing NLTK, Sentence Transformers and FAISS**

In [3]:
# Download required NLTK data
nltk_data_dir = os.path.join(os.path.expanduser("~"), "nltk_data")
nltk.data.path.append(nltk_data_dir)

# Download 'punkt_tab' using nltk.download
nltk.download('punkt_tab', download_dir=nltk_data_dir)
nltk.download('averaged_perceptron_tagger_eng')

# Load Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create FAISS index
index = faiss.IndexFlatL2(384)  # Dimension 384 for the 'all-MiniLM-L6-v2' model

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# **4. Grammatical Functions**

In [4]:
# Function to read text file content
def read_file(uploaded_file):
    content = uploaded_file.decode("utf-8")
    return content

# Text preprocessing function
def preprocess_text(text):
    text = re.sub(r'\[.*?\]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Function to calculate average word length
def average_word_length(text):
    words = word_tokenize(text)
    word_lengths = [len(word) for word in words]
    return sum(word_lengths) / len(words)

# Function to calculate punctuation density
def punctuation_density(text):
    total_chars = len(text)
    num_punctuation = sum([1 for char in text if char in string.punctuation])
    return num_punctuation / total_chars

# Function to calculate part-of-speech density
def pos_density(text):
    tokens = word_tokenize(text)
    tagged_tokens = pos_tag(tokens)
    pos_counts = Counter(tag for word, tag in tagged_tokens)
    total_words = len(tokens)
    pos_density = {tag: count / total_words for tag, count in pos_counts.items()}
    return pos_density

# Function to calculate sentence complexity
def sentence_complexity(text):
    sentences = sent_tokenize(text)
    complexity = sum([len(sent.split()) for sent in sentences]) / len(sentences)
    return complexity

# Function to calculate repetition ratio
def repetition_ratio(text):
    words = word_tokenize(text)
    unique_words = set(words)
    repetition_ratio = (len(words) - len(unique_words)) / len(words)
    return repetition_ratio

# Function to track dynamic parameters
def dynamic_parameter_tracking(transcript_text):
    avg_word_len = average_word_length(transcript_text)
    punctuation_dens = punctuation_density(transcript_text)
    pos_dens = pos_density(transcript_text)
    sent_comp = sentence_complexity(transcript_text)
    rep_ratio = repetition_ratio(transcript_text)
    readability_score = flesch_kincaid_grade(transcript_text)

    updated_parameters = {
        'avg_word_len': avg_word_len,
        'punctuation_dens': punctuation_dens,
        'pos_dens': pos_dens,
        'sent_comp': sent_comp,
        'rep_ratio': rep_ratio,
        'readability_score': readability_score
    }

    return updated_parameters

# **5. Graph Generation**

In [5]:
# Define styling for output display
style = """
<style>
    .output-box {
        border: 2px solid #4CAF50;
        padding: 10px;
        margin-top: 10px;
        border-radius: 5px;
        background-color: #f9f9f9;
    }
    .header {
        font-size: 18px;
        font-weight: bold;
        color: #4CAF50;
    }
</style>
"""


def generate_graphs(updated_parameters):
    # Prepare data for plotting
    labels = ['Avg Word Length', 'Punctuation Density', 'Sentence Complexity', 'Repetition Ratio']
    values = [
        updated_parameters['avg_word_len'],
        updated_parameters['punctuation_dens'],
        updated_parameters['sent_comp'],
        updated_parameters['rep_ratio']
    ]

    # Plotting the data
    plt.figure(figsize=(10, 6))
    y_pos = np.arange(len(labels))

    plt.barh(y_pos, values, align='center', color='skyblue')
    plt.yticks(y_pos, labels)
    plt.xlabel('Values')
    plt.title('Sales Transcript Linguistic Metrics')

    # Show plot
    plt.show()


def display_output(title, content, score=None):

    progress_bar = ""
    if score is not None:
        progress_bar = f"""
        <div class="score-box">Conversion Score: {score}/100</div>
        <div class="progress-container">
            <div class="progress-bar" style="width: {score}%;"></div>
        </div>
        """

    html_code = f"""
    {style}
    <div class='output-box'>
        <div class='header'>{title}</div>
        <pre>{content}</pre>
    </div>
    """
    display(HTML(html_code))

# **6. Google Gemini Prompt and Embeddings**

In [9]:
def generate_score_and_justification(transcript_text, avg_word_len, punctuation_dens, pos_dens, sent_comp, rep_ratio, readability_score):
    prompt = f"""
    Analyze the following sales conversation transcript to determine the likelihood of the customer purchasing the course. Provide a score out of 100 for the likelihood of conversion. Also, justify the score with five bullet points, considering various aspects such as language quality, customer engagement, agent responsiveness, and any other relevant factors you identify.

    Transcript:
    {transcript_text}

    Additional Parameters:
    - Average Word Length: {avg_word_len}
    - Punctuation Density: {punctuation_dens}
    - Part-of-Speech Density: {pos_dens}
    - Sentence Complexity: {sent_comp}
    - Repetition Ratio: {rep_ratio}
    - Readability Score: {readability_score}

    After analyzing the text and parameters, provide a detailed score and justification:

    Use the provided parameters and transcript text to assess the likelihood of conversion. Consider factors such as the clarity and persuasiveness of language, the level of customer interest and engagement, the responsiveness and effectiveness of the agent, and any other relevant aspects that contribute to the likelihood of conversion.

    Conversion Score: _______/100
    Justification:
    - Bullet Point 1: Assess the language quality critically, considering any instances of jargon, unclear explanations, or overly salesy language.
    - Bullet Point 2: Evaluate customer engagement, highlighting any areas where the customer's interest waned or where the agent failed to address concerns adequately.
    - Bullet Point 3: Critique agent responsiveness and effectiveness, noting any instances of delayed responses, incomplete information, or lack of empathy.
    - Bullet Point 4: Identify potential obstacles to conversion, such as pricing concerns, uncertainty about course delivery, or customer objections that were not fully resolved.
    - Bullet Point 5: Consider the overall tone and atmosphere of the conversation, including any factors that may have positively or negatively influenced the customer's perception of the course and the agent's handling of the call.

    Additionally, analyze why the customer would be willing to buy the course and why they wouldn't during the conversation. Provide two bullet points for each scenario and justify which scenario is more likely to happen, based on the conversation analysis.

    Reasons Customer Would Buy the Course:
    - Bullet Point 1: Highlight the benefits and features of the course that align with the customer's needs and goals, emphasizing how it can help advance their career or skills.
    - Bullet Point 2: Address any concerns or objections raised by the customer, demonstrating how the course addresses those challenges effectively.

    Reasons Customer Wouldn't Buy the Course:
    - Bullet Point 1: Identify any unresolved concerns or objections raised by the customer that may prevent them from making a purchase decision.
    - Bullet Point 2: Consider any external factors or competing priorities mentioned by the customer that could impact their willingness or ability to enroll in the course.

    Justification for Likelihood of Conversion:
    - Provide a brief analysis comparing the reasons for buying and not buying the course based on the conversation. Justify which scenario is more likely to happen and why, considering the overall tone, customer engagement, and agent effectiveness during the conversation.

    Predictive Analysis:
    Based on the provided transcript and additional parameters, use your expertise to predict the likelihood of conversion. Consider factors such as the customer's level of interest, the agent's effectiveness in addressing concerns, and any potential obstacles to conversion.

    Salesperson Feedback:
    Lastly, provide feedback to the salesperson based on the transcript analysis. Highlight any mistakes made during the conversation and suggest improvements to enhance conversion rates. Justify your feedback with specific examples from the transcript.
    """

    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content(prompt)

    cleaned_text = response.text.replace("****", "")  # Remove any '****' from the output
    return cleaned_text.strip()



def generate_and_store_embeddings(text):
    embeddings = model.encode([text])
    index.add(embeddings)
    return embeddings


# **7. Direct Input**

In [10]:
# Main execution
uploaded_file_path = '/content/rittick dutta.txt'
with open(uploaded_file_path, 'r') as file:
    content = file.read()

cleaned_text = preprocess_text(content)
updated_parameters = dynamic_parameter_tracking(cleaned_text)

# Generate embeddings and store them
embeddings = generate_and_store_embeddings(cleaned_text)

# Generate score and justification
score_and_justification = generate_score_and_justification(cleaned_text, **updated_parameters)

# Display results
print("Score and Justification:")
print(score_and_justification)

Score and Justification:
**Conversion Score:** 60/100

**Justification:**

- **Bullet Point 1: Language Quality:** The language used is clear and concise, with minimal jargon or technical terms. The agent provides thorough explanations and avoids overly salesy language. However, there are instances where the agent's speech is broken and difficult to understand.
- **Bullet Point 2: Customer Engagement:** The customer seems interested in the course and engages actively with the agent. The agent responds to the customer's queries and concerns promptly, but sometimes fails to ask clarifying questions or acknowledge the customer's perspective fully.
- **Bullet Point 3: Agent Responsiveness and Effectiveness:** The agent is generally responsive, providing information and addressing the customer's questions. However, there are instances where the agent provides incomplete information or jumps between topics, which can be confusing for the customer.
- **Bullet Point 4: Potential Obstacles to C

# **8. IPython Widget**

In [11]:
def process_uploaded_content(file_content):
    """Handles user-uploaded content and processes it."""
    cleaned_text = preprocess_text(file_content)
    updated_parameters = dynamic_parameter_tracking(cleaned_text)

    # Generate and display the score and justification
    score_and_justification = generate_score_and_justification(cleaned_text, **updated_parameters)
    display_output("Score and Justification", score_and_justification)

    # Generate and display the graph for linguistic metrics
    generate_graphs(updated_parameters)


def process_uploaded_file(file_content):
    """Save uploaded content to a temporary file before processing."""
    with tempfile.NamedTemporaryFile(delete=False, mode="w", encoding="utf-8") as temp_file:
        temp_file.write(file_content)
        temp_file_path = temp_file.name  # Get the path of the temporary file

    with open(temp_file_path, "r", encoding="utf-8") as f:
        content = f.read()

    process_uploaded_content(content)



# File upload widget
upload_button = widgets.FileUpload(accept='.txt', multiple=False)

# Process button
process_button = widgets.Button(description="Analyze Transcript", button_style="success")

# Output display widget
output = widgets.Output()

# Function to handle button click
def on_process_clicked(b):
    with output:
        output.clear_output()

        if upload_button.value:
            uploaded_file = next(iter(upload_button.value.values()))
            file_content = uploaded_file['content'].decode("utf-8")  # Decode uploaded file

            display_output("Processing File", "Analyzing transcript...")
            process_uploaded_file(file_content)  # Pass content instead of treating it as a file path
        else:
            display_output("Error", "No file uploaded. Please upload a transcript file.")

# Connect button click event
process_button.on_click(on_process_clicked)

# Display UI widgets
display(upload_button, process_button, output)

FileUpload(value={}, accept='.txt', description='Upload')

Button(button_style='success', description='Analyze Transcript', style=ButtonStyle())

Output()