# Porter Stemmer

In [27]:
import pandas as pd
from nltk.stem import PorterStemmer

# Create a Porter Stemmer instance
porter_stemmer = PorterStemmer()

# Example words for stemming
words = ["running", "jumps", "happily", "running", "happily", "fairly", "sportingly"]

# Apply stemming to each word
stemmed_words = [porter_stemmer.stem(word) for word in words]

# Create a DataFrame with original and stemmed columns
df = pd.DataFrame({'Original': words, 'Stemmed': stemmed_words})

# Display the DataFrame
df

Unnamed: 0,Original,Stemmed
0,running,run
1,jumps,jump
2,happily,happili
3,running,run
4,happily,happili
5,fairly,fairli
6,sportingly,sportingli


# Regex Stemmer

In [35]:
from nltk.stem import RegexpStemmer

# Create a Regexp Stemmer with a custom rule
custom_rule = r'ing$'
regexp_stemmer = RegexpStemmer(custom_rule)

# Apply the stemmer to a word
word = 'running'
stemmed_word = regexp_stemmer.stem(word)

print(f'Original Word: {word}')
print(f'Stemmed Word: {stemmed_word}')


Original Word: running
Stemmed Word: runn


# Snowball Stemmer

In [25]:
from nltk.stem import SnowballStemmer

# Choose a language for stemming, for example, English
stemmer = SnowballStemmer(language='english')

# Example words to stem
words_to_stem = ['running', 'jumped', 'happily', 'quickly', 'foxes', 'fairly', 'sportingly']

# Apply Snowball Stemmer
stemmed_words = [stemmer.stem(word) for word in words_to_stem]

# Create a DataFrame with original and stemmed columns
df = pd.DataFrame({'Original': words_to_stem, 'Stemmed': stemmed_words})

# Display the DataFrame
df

Unnamed: 0,Original,Stemmed
0,running,run
1,jumped,jump
2,happily,happili
3,quickly,quick
4,foxes,fox
5,fairly,fair
6,sportingly,sport


# Lancaster Stemmer

In [29]:
from nltk.stem import LancasterStemmer

# Create a Lancaster Stemmer instance
stemmer = LancasterStemmer()

# Example words to stem
words_to_stem = ['running', 'jumped', 'happily', 'quickly', 'foxes', 'fairly', 'sportingly']

# Apply Lancaster Stemmer
stemmed_words = [stemmer.stem(word) for word in words_to_stem]

# Create a DataFrame with original and stemmed columns
df = pd.DataFrame({'Original': words_to_stem, 'Stemmed': stemmed_words})

# Display the DataFrame
df

Unnamed: 0,Original,Stemmed
0,running,run
1,jumped,jump
2,happily,happy
3,quickly,quick
4,foxes,fox
5,fairly,fair
6,sportingly,sport


# Word Lemmatization

In [45]:
import nltk
import os
import sys

# Suppress the nltk download output
nltk.download('wordnet', quiet=True)

from nltk.stem import WordNetLemmatizer
xxxx
# Create WordNetLemmatizer object
lemmatizer = WordNetLemmatizer()

# Single word lemmatization examples
words = ['kites', 'babies', 'dogs', 'flying', 'smiling', 'driving', 'died', 'tried', 'feet']
for word in words:
    print(word + " ---> " + lemmatizer.lemmatize(word))

kites ---> kite
babies ---> baby
dogs ---> dog
flying ---> flying
smiling ---> smiling
driving ---> driving
died ---> died
tried ---> tried
feet ---> foot


# Sentence Lemmatization

In [60]:
# sentence lemmatization examples
string = 'Leaves are falling from the trees in autumn.'

# Converting String into tokens
words = nltk.word_tokenize(string)
print(words)

# Create WordNetLemmatizer object
lemmatizer = WordNetLemmatizer()

lemmatized_string = ' '.join([lemmatizer.lemmatize(word) for word in words])

print(lemmatized_string) 



['Leaves', 'are', 'falling', 'from', 'the', 'trees', 'in', 'autumn', '.']
Leaves are falling from the tree in autumn .
