In [33]:
import pandas as pd
import spacy
from spacy.matcher import Matcher
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [34]:
# Load the dataset
active_passive = pd.read_csv(r"C:\Users\amnsh\OneDrive\Desktop\NLP Lab\Datasets\Act_Pas\active_passive.csv")

In [35]:
# Tokenize and preprocess the sentences
nlp = spacy.load("en_core_web_sm")

def preprocess_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc])

active_passive['Active_processed'] = active_passive['Active'].apply(preprocess_text)
active_passive['Passive_processed'] = active_passive['Passive'].apply(preprocess_text)

# Concatenate processed sentences and labels separately
X = active_passive['Active_processed'].tolist() + active_passive['Passive_processed'].tolist()
y = ['Active'] * len(active_passive) + ['Passive'] * len(active_passive)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
# Feature extraction using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [37]:
def identify_voice_dependency(sentence):
  doc = nlp(sentence)

  # Check if passive auxiliary verb is present with a past participle
  for token in doc:
    if token.pos_ == "AUX" and token.dep_ == "auxpass" and token.head.pos_ == "VERB":
      return "Passive"

  # Check for "be" verbs with past participle indicating passive voice
  for token in doc:
    if token.lemma_ in ["be", "am", "is", "are", "was", "were", "been"] and token.dep_ == "cop" and token.head.pos_ == "VERB":
      return "Passive"
  
  return "Active"

In [40]:
def identify_voice(sentence):
    doc = nlp(sentence)
    passive_indicators = ["is", "am", "are", "was", "were", "be", "been", "being"]
    
    for token in doc:
        if token.text.lower() in passive_indicators and token.dep_ == "aux":
            print("Passive indicator:", token.text.lower(), "Dependency:", token.dep_)
            return "Passive"
    
    return "Active"


In [41]:
def transform_sentence(sentence):
  predicted_voice = identify_voice(sentence)
  if predicted_voice == 'Active':
    # Search for the processed passive sentence in the Passive_processed column
    matching_row = active_passive.loc[active_passive['Passive_processed'] == preprocess_text(sentence), 'Active']
  else:
    # Search for the processed active sentence in the Active_processed column
    matching_row = active_passive.loc[active_passive['Active_processed'] == preprocess_text(sentence), 'Passive']

  if len(matching_row) > 0:
    return matching_row.iloc[0]
  else:
    return sentence  # Return the original sentence if no match is found


In [42]:
# Transformation function using dependency parsing
def transform_to_opposite_voice(sentence):
    predicted_voice = identify_voice(sentence)
    if predicted_voice == 'Active':
        return transform_sentence(sentence)
    else:
        return transform_sentence(sentence)

In [44]:
# Example usage
user_input = "The cat chased the mouse."
predicted_voice = identify_voice_dependency(user_input)
print("Predicted Voice:", predicted_voice)

Predicted Voice: Active


In [58]:
import spacy
from collections import defaultdict

nlp = spacy.load("en_core_web_sm")


def identify_voice_dependency(sentence):
  doc = nlp(sentence)
  passive_aux_verbs = ["be", "am", "is", "are", "was", "were", "been"]

  # Check for passive auxiliary verb with past participle
  for token in doc:
    if token.pos_ == "AUX" and token.dep_ == "auxpass" and token.head.pos_ == "VERB":
      return "Passive"

  # Check for "be" verbs with past participle indicating passive voice
  for token in doc:
    if token.lemma_ in passive_aux_verbs and token.dep_ == "cop" and token.head.pos_ == "VERB":
      return "Passive"

  return "Active"


def transform_to_opposite_voice(sentence):
  predicted_voice = identify_voice_dependency(sentence)
  doc = nlp(sentence)

  if predicted_voice == 'Active':
    # Active to Passive Transformation

    # Find the subject and object
    subject = None
    object = None
    for token in doc:
      if token.dep_ == "nsubj":
        subject = token.text
      elif token.dep_ == "dobj":
        object = token.text

    if subject and object:
      # Replace verb with past participle (assuming simple active voice)
      verb = [token.text for token in doc if token.pos_ == "VERB"][0]
      passive_verb = f"{verb}ed"
      return f"{object} is {passive_verb} by {subject}"  # Simple passive structure
    else:
      return sentence  # Could not identify subject-object for transformation

  else:
    # Passive to Active Transformation (assuming simple passive structure)

    aux_verb = None
    past_participle = None
    subject = None  # Might need adjustment based on sentence structure

    for token in doc:
      if token.pos_ == "AUX" and token.dep_ == "auxpass":
        aux_verb = token.text
      elif token.dep_ == "pcomp" and token.pos_ == "VERB":  # Assuming past participle
        past_participle = token.text
      elif token.dep_ == "nsubjpass":  # Passive subject (agent)
        subject = token.text

    if aux_verb and past_participle:
      verb = past_participle.rstrip("ed")  # Remove "ed" from past participle
      return f"{subject} {aux_verb.replace('be', 'beens')} {verb}"  # Handle "be" verbs appropriately
    else:
      return sentence  # Could not identify necessary elements for transformation


user_input = "The teacher reject the project."
transformed_sentence = transform_to_opposite_voice(user_input)
print("Original Sentence:", user_input)
print("Transformed Sentence:", transformed_sentence)


Original Sentence: The teacher reject the project.
Transformed Sentence: project is rejected by teacher
