In [45]:
import pandas as pd              # importing pandas to clean and process the dataset

In [2]:
df = pd.read_csv("dict.csv")     # Read the CSV file into a pandas DataFrame

In [3]:
print(df)                        # Print the entire DataFrame

            word                                         definition
0         abbacy  The word "abbacy" refers to the office or juri...
1       abductor  The word "abductor" refers to a person or thin...
2           abas  The word "abas" does not have a widely recogni...
3      abasement  The word "abasement" refers to the action or e...
4       abampere  The term "abampere" is a unit of electric curr...
...          ...                                                ...
42047       worm  The word "worm" can refer to several related c...
42048       true  The word "true" has several meanings in Englis...
42049       whip  The word "whip" has several meanings in Englis...
42050       work  The word "work" has several meanings in Englis...
42051       wing  The word "wing" has several definitions in Eng...

[42052 rows x 2 columns]


In [4]:
pd.isna(df)                      # Check for missing values (NaN) in the DataFrame

Unnamed: 0,word,definition
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
42047,False,False
42048,False,False
42049,False,False
42050,False,False


In [5]:
df2 = df.copy()                            # Create a copy of the DataFrame
df2['word'] = df2['word'].astype(str)      # Ensure the 'word' column is of string type
df2['word'] = df2['word'].str.lower()      # Convert all words in the 'word' column to lowercase
df2                                        # Display the modified DataFrame

Unnamed: 0,word,definition
0,abbacy,"The word ""abbacy"" refers to the office or juri..."
1,abductor,"The word ""abductor"" refers to a person or thin..."
2,abas,"The word ""abas"" does not have a widely recogni..."
3,abasement,"The word ""abasement"" refers to the action or e..."
4,abampere,"The term ""abampere"" is a unit of electric curr..."
...,...,...
42047,worm,"The word ""worm"" can refer to several related c..."
42048,true,"The word ""true"" has several meanings in Englis..."
42049,whip,"The word ""whip"" has several meanings in Englis..."
42050,work,"The word ""work"" has several meanings in Englis..."


In [6]:
df2 = df2.sort_values(by='word', ascending=True)    # Sort the DataFrame by the 'word' column in ascending order
df2.reset_index(drop=True, inplace=True)            # Reset the index of the DataFrame
df2                                                 # Display the sorted and re-indexed DataFrame

Unnamed: 0,word,definition
0,a,"The word ""a"" is an indefinite article in Engli..."
1,a,"The word ""A"" is an indefinite article in Engli..."
2,a,"The word ""a"" is an indefinite article in Engli..."
3,aa,"The word ""aa"" is a term used in geology to des..."
4,aalii,"The word ""aalii"" refers to a type of shrub or ..."
...,...,...
42047,zymology,Zymology is the branch of science that deals w...
42048,zymolysis,Zymolysis is a biological process that refers ...
42049,zymosis,'Zymosis' refers to the process of fermentatio...
42050,zymurgy,Zymurgy is the branch of chemistry that deals ...


In [7]:
df2.describe(include='all')               # Generate descriptive statistics for the DataFrame, including all columns

Unnamed: 0,word,definition
count,42052,42052
unique,40404,42052
top,lead,"The term ""Zyrian"" generally refers to the Komi..."
freq,3,1


In [16]:
df2['word'].value_counts()                # Count the occurrences of each unique word in the 'word' column

Unnamed: 0_level_0,count
word,Unnamed: 1_level_1
lead,3
red,3
spring,3
art,3
as,3
...,...
fit,1
fitch,1
fitfulness,1
fitment,1


In [12]:
lead_rows = df2[df2['word'] == 'lead']          # Filter the DataFrame to get rows where the word is 'lead'
display(lead_rows)                              # Display the filtered rows
print(lead_rows.dtypes)                         # Print the data types of the columns in the filtered DataFrame

Unnamed: 0,word,definition
20915,lead,"The word ""lead"" can function as both a noun an..."
20916,lead,"The word ""lead"" has several meanings in Englis..."
20917,lead,"The word ""lead"" can have several meanings, dep..."


word          object
definition    object
dtype: object


In [26]:
df3 = df2.drop_duplicates(subset='word', keep='first')          # Drop duplicate rows based on the 'word' column, keeping the first occurrence
df3 = df3.reset_index(drop=True)                                # Reset the index of the new DataFrame
df3.describe(include='all')                                     # Generate descriptive statistics for the new DataFrame

Unnamed: 0,word,definition
count,40404,40404
unique,40404,40404
top,zyrian,"The term ""Zyrian"" generally refers to the Komi..."
freq,1,1


In [27]:
df3['word'].value_counts()                                      # Count the occurrences of each unique word in the 'word' column of the new DataFrame

Unnamed: 0_level_0,count
word,Unnamed: 1_level_1
zyrian,1
a,1
aa,1
aalii,1
zygocactus,1
...,...
abaca,1
aba,1
ab,1
aaron,1


In [46]:
df3.to_csv("Dictionary.csv")    # Save the revised and clean DataFrame to a CSV file
df3                             # Display the new DataFrame with duplicate words removed

Unnamed: 0,word,definition
0,a,"The word ""a"" is an indefinite article in Engli..."
1,aa,"The word ""aa"" is a term used in geology to des..."
2,aalii,"The word ""aalii"" refers to a type of shrub or ..."
3,aardvark,"An ""aardvark"" is a nocturnal mammal native to ..."
4,aardwolf,"The term ""aardwolf"" refers to a nocturnal, ins..."
...,...,...
40399,zymology,Zymology is the branch of science that deals w...
40400,zymolysis,Zymolysis is a biological process that refers ...
40401,zymosis,'Zymosis' refers to the process of fermentatio...
40402,zymurgy,Zymurgy is the branch of chemistry that deals ...


In [44]:
print("This is an english dictionary.")        # Print a welcome message
word = input("Enter the word: ")               # Get user input for the word to search
defn = df3[df3['word'] == word]                # Filter the DataFrame to find the word
if not defn.empty:                             # Check if the word was found
  for i in defn.iloc[0,1]:                     # Iterate through the characters of the definition
    if(i=='.'):                                # If the character is a period, print a period to move the succeding lines in the next line
      print('.')
    else:                                      # Otherwise, print the character without a newline
      print(i,end='')
else: # If the word was not found
  print("Word not found in the dictionary.")   # Print a message indicating the word was not found

This is an english dictionary.
Enter the word: mob
The word "mob" can be defined as follows:

1.
 **Noun**: A large and disorderly group of people, often involved in unlawful or violent activities.
 For example, a mob may gather to protest or riot.

   
2.
 **Noun**: In a more general sense, it can refer to any large crowd or mass of people.


3.
 **Noun (informal)**: It can also refer to a group of people with a common interest or characteristic, such as a "mob" of friends.


4.
 **Verb**: To move in a large crowd or swarm; to surround or attack someone in a group.
 For instance, "The fans mobbed the celebrity.
"

The term can carry a negative connotation, especially when associated with illegal or violent behavior.
