# with time & memory testing

In [1]:
import sys
import os

# Get the root of the project (i.e., one level up from 'notebooks/')
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.insert(0, project_root)

print("Project root added to sys.path:", project_root)


Project root added to sys.path: /home/iiserb/Desktop/tabasco-fastapi


In [None]:
from nltk.corpus import stopwords, wordnet as wn

# Pre-load stopwords and noun vocabulary once
STOP_WORDS = set(stopwords.words("english"))
ALL_NOUNS = {lemma.name().lower() for synset in wn.all_synsets(wn.NOUN) for lemma in synset.lemmas()}

from src.services.preprocessing import _basic_clean_text
from src.services import extract_top_n_nouns_with_frequency

import time
import tracemalloc

def timed_memory_profile(func, *args, **kwargs):
    tracemalloc.start()
    start_time = time.perf_counter()

    result = func(*args, **kwargs)

    end_time = time.perf_counter()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    print(f"Execution time: {end_time - start_time:.4f} seconds")
    print(f"Current memory usage: {current / 1024:.2f} KB")
    print(f"Peak memory usage: {peak / 1024:.2f} KB")

    return result

## 1. Archaic Tests (Although model was not made for this) - Used shakespeare book

In [3]:


with open('../static/uploads/Shakespeare-Complete-Works.txt') as f:
    text_content = f.read()

print(len(text_content))

5473240


In [4]:
result = timed_memory_profile(extract_top_n_nouns_with_frequency, text_content, 1, STOP_WORDS, ALL_NOUNS)
print(result)

Execution time: 78.5741 seconds
Current memory usage: 29652.23 KB
Peak memory usage: 207442.19 KB
{'lord': 3005}


# 2. A more modern test on Psychology book, which the noun tagger was made for

In [6]:
with open('../static/uploads/SSC GD result 2025 declared at ssc.gov.in, check cut-off marks and download PDF - Times of India.txt') as f:
    text_content = f.read()

print(len(text_content))

4022


In [8]:
result = timed_memory_profile(extract_top_n_nouns_with_frequency, text_content, 10, STOP_WORDS, ALL_NOUNS)
print(result)

Execution time: 0.0408 seconds
Current memory usage: 2.19 KB
Peak memory usage: 74.76 KB
{'result': 18, 'gd': 15, 'marks': 11, 'cutoff': 9, 'step': 6, 'constable': 5, 'list': 4, 'answer': 4, 'commission': 3, 'examination': 3}


# 3. Test 3: A short corpus text of psychology

In [7]:
text_content2 = """Psychology is the scientific study of behavior and mental processes. It explores how individuals think, feel, and act both independently and within social contexts. Modern psychology covers a wide range of topics including cognition, emotion, motivation, personality, development, and mental health. Researchers use various methods such as experiments, observations, and surveys to understand human behavior. Advances in neuroscience have also deepened our understanding of the brain mechanisms underlying cognitive functions and emotional regulation. Applied psychology branches include clinical psychology, counseling, educational psychology, and industrial-organizational psychology, all aiming to improve well-being and performance across different settings.
"""

In [10]:
result = timed_memory_profile(extract_top_n_nouns_with_frequency, text_content, 10, STOP_WORDS, ALL_NOUNS)
print(result)

Execution time: 0.0366 seconds
Current memory usage: 1.44 KB
Peak memory usage: 73.42 KB
{'result': 18, 'gd': 15, 'marks': 11, 'cutoff': 9, 'step': 6, 'constable': 5, 'list': 4, 'answer': 4, 'commission': 3, 'examination': 3}


# Testing Time & Memory of the Preprocessing API

In [12]:
text = """Psychology is the scientific study of behavior and mental processes. ..."""

result = timed_memory_profile(extract_top_n_nouns_with_frequency, text, 10, STOP_WORDS, ALL_NOUNS)
print(result)


Execution time: 0.0008 seconds
Current memory usage: 0.36 KB
Peak memory usage: 5.48 KB
{'psychology': 1, 'study': 1, 'behavior': 1}
