<a href="https://colab.research.google.com/github/xaltyPasta/RAG_Assignmrnt/blob/main/RAG_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Define a more structured legal database
legal_database = {
    "IRC § 501(c)(3)": {"type": "code", "description": "Internal Revenue Code - Defines religious organizations"},
    "Bob Jones University v. Simon": {"type": "case", "description": "Landmark case on religious freedom and tax exemption", "year": 1983, "court": "U.S."},
    "Reynolds v. United States": {"type": "case", "description": "Case on commercial activity and tax-exempt status", "year": 1996, "court": "9th Cir."},
    "Eastern Montana College of Education v. Helena": {"type": "case", "description": "Case on substantial vs. incidental commercial activity", "year": 1991, "court": "9th Cir."},
    "Glock v. Commissioner": {"type": "case", "description": "Case on religious organizations engaging in commercial activities", "year": 1982, "court": "T.C."},
    "Speakman v. Commissioner": {"type": "case", "description": "Case on ancillary commercial activities for religious purposes", "year": 1987, "court": "6th Cir."},
    "Murdoch v. Commissioner": {"type": "case", "description": "Case on defining 'religious purpose' for tax exemption", "year": 1983, "court": "9th Cir."},
    "Texas Heart Hospital of St. Luke's Episcopal Health Charities Inc. v. United States": {"type": "case", "description": "Case on religious hospitals and tax exemption", "year": 1992, "court": "5th Cir."},
    "United States v. The Sanctuary": {"type": "case", "description": "Case on religious organizations providing social services", "year": 1995, "court": "9th Cir."},
    "Hermitage Ministries Inc. v. Commissioner": {"type": "case", "description": "Case on religious organizations and fundraising activities", "year": 1979, "court": "T.C."}
}

In [None]:
import spacy
import re

# Load the English language model for spaCy
nlp = spacy.load("en_core_web_sm")


def find_citations(text):
    # Empty list to store the citations
    citations = []

    # Regular expression pattern for case citations

    citation_pattern = r"\b[A-Za-z]+(?:\s+[A-Za-z]+)*\s+v\.?\s+[A-Za-z]+(?:\s+[A-Za-z]+)*\b"
    #citation_pattern = r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\s+v\.\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b"

    # Use regular expressions to find legal citations in the text
    matches = re.findall(citation_pattern, text)

    # Process each match
    for match in matches:
      #if it's present in the leagal database
      if match in legal_database:
        # Add the citation to the list of citations
        citations.append(match.strip())

    # Return the list of citations
    return citations

# Sample text to search for citations
text = "This is a sample text with citations: Reynolds v. United States. Bob Jones University v. Simon. United States v. The Sanctuary. Hermitage Ministries, Inc. v. Commissioner. Texas Heart Hospital of St. Luke's Episcopal Health Charities, Inc. v. United States. Murdoch v. Commissioner. Speakman v. Commissioner. Glock v. Commissioner. Eastern Montana College of Education v. Helena."

# Call the find_citations function to find citations in the text
citations = find_citations(text)

# Print the list of citations
print("Citations found:")
for citation in citations:
    print(citation)


Citations found:
Reynolds v. United States
Bob Jones University v. Simon
United States v. The Sanctuary
Murdoch v. Commissioner
Speakman v. Commissioner
Glock v. Commissioner
Eastern Montana College of Education v. Helena


In [None]:
def get_actual_predicted_citations(actual_citations, predicted_citations):
    return actual_citations, predicted_citations

# Sample text to search for citations
text = "This is a sample text with citations: Reynolds v. United States. Bob Jones University v. Simon. United States v. The Sanctuary. Hermitage Ministries, Inc. v. Commissioner. Texas Heart Hospital of St. Luke's Episcopal Health Charities, Inc. v. United States. Murdoch v. Commissioner. Speakman v. Commissioner. Glock v. Commissioner. Eastern Montana College of Education v. Helena."

# Sample actual and predicted citations
actual_citations = ["Reynolds v. United States", "Bob Jones University v. Simon", "United States v. The Sanctuary", "Hermitage Ministries, Inc. v. Commissioner", "Texas Heart Hospital of St. Luke's Episcopal Health Charities, Inc. v. United States", "Murdoch v. Commissioner", "Speakman v. Commissioner", "Glock v. Commissioner", "Eastern Montana College of Education v. Helena"]
predicted_citations = find_citations(text)

# Get actual and predicted citations as separate lists
actual_citations_list, predicted_citations_list = get_actual_predicted_citations(actual_citations, predicted_citations)

# Print actual and predicted citations separately
print("Actual Citations:")
print(actual_citations_list)
print("\nPredicted Citations:")
print(predicted_citations_list)

Actual Citations:
['Reynolds v. United States', 'Bob Jones University v. Simon', 'United States v. The Sanctuary', 'Hermitage Ministries, Inc. v. Commissioner', "Texas Heart Hospital of St. Luke's Episcopal Health Charities, Inc. v. United States", 'Murdoch v. Commissioner', 'Speakman v. Commissioner', 'Glock v. Commissioner', 'Eastern Montana College of Education v. Helena']

Predicted Citations:
['Reynolds v. United States', 'Bob Jones University v. Simon', 'United States v. The Sanctuary', 'Murdoch v. Commissioner', 'Speakman v. Commissioner', 'Glock v. Commissioner', 'Eastern Montana College of Education v. Helena']


In [None]:
from sklearn.metrics import precision_score

def evaluate_precision(predicted_citations, actual_citations):
    # Check if the lengths of predicted and actual citations match
    if len(predicted_citations) != len(actual_citations):
        raise ValueError("The number of predicted and actual citations lists do not match.")

    # Calculate the precision score
    precision = precision_score(actual_citations, predicted_citations, average='micro')

    return precision

# Sample usage
predicted_citations = ["Reynolds v. United States", "Bob Jones University v. Simon", "United States v. The Sanctuary",
                       "Murdoch v. Commissioner", "Speakman v. Commissioner", "Glock v. Commissioner", "Education v. Helena"]

actual_citations = ["Reynolds v. United States", "Bob Jones University v. Simon", "United States v. The Sanctuary",
                    "Murdoch v. Commissioner", "Speakman v. Commissioner", "Glock v. Commissioner", "Eastern Montana College of Education v. Helena"]

# Truncate the predicted_citations list to match the length of actual_citations
predicted_citations = predicted_citations[:len(actual_citations)]

precision = evaluate_precision(predicted_citations, actual_citations)
print("Precision:", precision)


Precision: 0.8571428571428571


In [None]:
from sklearn.metrics import recall_score

def evaluate_recall(predicted_citations, actual_citations):
    # Check if the lengths of predicted and actual citations match
    if len(predicted_citations) != len(actual_citations):
        raise ValueError("The number of predicted and actual citations lists do not match.")

    # Calculate the recall score
    recall = recall_score(actual_citations, predicted_citations, average='micro')

    return recall

# Sample usage

# Sample usage
predicted_citations = ["Reynolds v. United States", "Bob Jones University v. Simon", "United States v. The Sanctuary",
                       "Murdoch v. Commissioner", "Speakman v. Commissioner", "Glock v. Commissioner", "Education v. Helena"]

actual_citations = ["Reynolds v. United States", "Bob Jones University v. Simon", "United States v. The Sanctuary",
                    "Murdoch v. Commissioner", "Speakman v. Commissioner", "Glock v. Commissioner", "Eastern Montana College of Education v. Helena"]
# Truncate the predicted_citations list to match the length of actual_citations
predicted_citations = predicted_citations[:len(actual_citations)]

recall = evaluate_recall(predicted_citations, actual_citations)
print("Recall:", recall)


Recall: 0.8571428571428571


In [None]:
from sklearn.metrics import f1_score

def evaluate_f1_score(predicted_citations, actual_citations):
    # Check if the lengths of predicted and actual citations match
    if len(predicted_citations) != len(actual_citations):
        raise ValueError("The number of predicted and actual citations lists do not match.")

    # Calculate the F1 score
    f1 = f1_score(actual_citations, predicted_citations, average='micro')

    return f1


# Sample usage
predicted_citations = ["Reynolds v. United States", "Bob Jones University v. Simon", "United States v. The Sanctuary",
                       "Murdoch v. Commissioner", "Speakman v. Commissioner", "Glock v. Commissioner", "Education v. Helena"]

actual_citations = ["Reynolds v. United States", "Bob Jones University v. Simon", "United States v. The Sanctuary",
                    "Murdoch v. Commissioner", "Speakman v. Commissioner", "Glock v. Commissioner", "Eastern Montana College of Education v. Helena"]
# Truncate the predicted_citations list to match the length of actual_citations
predicted_citations = predicted_citations[:len(actual_citations)]

f1_score = evaluate_f1_score(predicted_citations, actual_citations)
print("F1 Score:", f1_score)


F1 Score: 0.8571428571428571
